Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/setup/directories/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ runs:
run: |
rm -f config.status .rbconfig.time \
Makefile GNUmakefile uncommon.mk enc.mk noarch-fake.rb
rm -f prism/.time prism/util/.time

- if: steps.which.outputs.sudo
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/wasm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ jobs:
../src/configure \
--host wasm32-unknown-wasi \
--with-baseruby=$PWD/../baseruby/install/bin/ruby \
--with-dump-ast=$PWD/../baseruby/dump_ast \
--with-static-linked-ext \
--with-ext=cgi/escape,continuation,coverage,date,digest/bubblebabble,digest,digest/md5,digest/rmd160,digest/sha1,digest/sha2,etc,fcntl,json,json/generator,json/parser,objspace,pathname,rbconfig/sizeof,ripper,stringio,strscan,monitor \
LDFLAGS=" \
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ lcov*.info
/prism/serialize.c
/prism/token_type.c
/prism/srcs.mk
/dump_ast

# tool/update-NEWS-gemlist.rb
/bundled_gems.json
Expand Down
75 changes: 75 additions & 0 deletions benchmark/string_memsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
prelude: |
# Haystacks of various sizes
small_hay = "a" * 256
medium_hay = "a" * 4096
large_hay = "a" * 65536

# Short needles (2-8 bytes) that exercise rb_memsearch_ss
needle_2 = "xy"
needle_4 = "xyzw"
needle_8 = "xyzwabcd"

# Needle whose first byte is absent from the haystack (memchr fast-path)
# vs needle whose first byte is common (rolling hash comparison)
first_byte_absent = "x" + "a" * 3
first_byte_common = "a" + "x" * 3

# Haystack with match at the end
hay_match_end = "a" * 4095 + "xy"

# Haystack with match at the start
hay_match_start = "xy" + "a" * 4094

# Mixed content haystack (more realistic)
mixed_hay = (("abcdefghij" * 100) + "z") * 10

benchmark:
# === First byte absent from haystack (biggest win for rolling hash) ===
index_first_byte_absent_small: |
small_hay.index(first_byte_absent)
index_first_byte_absent_medium: |
medium_hay.index(first_byte_absent)
index_first_byte_absent_large: |
large_hay.index(first_byte_absent)

# === First byte common in haystack (stresses comparison loop) ===
index_first_byte_common_small: |
small_hay.index(first_byte_common)
index_first_byte_common_medium: |
medium_hay.index(first_byte_common)
index_first_byte_common_large: |
large_hay.index(first_byte_common)

# === Needle length variations (all absent) ===
index_needle_2_absent: |
medium_hay.index(needle_2)
index_needle_4_absent: |
medium_hay.index(needle_4)
index_needle_8_absent: |
medium_hay.index(needle_8)

# === Match at end of haystack ===
index_match_at_end: |
hay_match_end.index(needle_2)

# === Match at start of haystack ===
index_match_at_start: |
hay_match_start.index(needle_2)

# === include? (same code path) ===
include_first_byte_absent: |
medium_hay.include?(first_byte_absent)
include_first_byte_common: |
medium_hay.include?(first_byte_common)

# === byteindex ===
byteindex_first_byte_absent: |
medium_hay.byteindex(first_byte_absent)
byteindex_first_byte_common: |
medium_hay.byteindex(first_byte_common)

# === Mixed/realistic haystack ===
index_mixed_absent: |
mixed_hay.index(needle_4)
index_mixed_present: |
mixed_hay.index("ijab")
24 changes: 16 additions & 8 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,8 @@ MAKE_ENC = -f $(ENC_MK) V="$(V)" UNICODE_HDR_DIR="$(UNICODE_HDR_DIR)" \

PRISM_BUILD_DIR = prism

PRISM_FILES = prism/api_node.$(OBJEXT) \
prism/diagnostic.$(OBJEXT) \
LIBPRISM_OBJS = prism/diagnostic.$(OBJEXT) \
prism/encoding.$(OBJEXT) \
prism/extension.$(OBJEXT) \
prism/node.$(OBJEXT) \
prism/options.$(OBJEXT) \
prism/prettyprint.$(OBJEXT) \
Expand All @@ -112,9 +110,14 @@ PRISM_FILES = prism/api_node.$(OBJEXT) \
prism/util/pm_string.$(OBJEXT) \
prism/util/pm_strncasecmp.$(OBJEXT) \
prism/util/pm_strpbrk.$(OBJEXT) \
prism/prism.$(OBJEXT) \
prism/prism.$(OBJEXT)

EXTPRISM_OBJS = prism/api_node.$(OBJEXT) \
prism/extension.$(OBJEXT) \
prism_init.$(OBJEXT)

PRISM_OBJS = $(LIBPRISM_OBJS) $(EXTPRISM_OBJS)

COMMONOBJS = \
array.$(OBJEXT) \
ast.$(OBJEXT) \
Expand Down Expand Up @@ -192,7 +195,7 @@ COMMONOBJS = \
vm_sync.$(OBJEXT) \
vm_trace.$(OBJEXT) \
weakmap.$(OBJEXT) \
$(PRISM_FILES) \
$(PRISM_OBJS) \
$(YJIT_OBJ) \
$(ZJIT_OBJ) \
$(JIT_OBJ) \
Expand All @@ -203,7 +206,7 @@ COMMONOBJS = \
$(BUILTIN_TRANSOBJS) \
$(MISSING)

$(PRISM_FILES): $(PRISM_BUILD_DIR)/.time $(PRISM_BUILD_DIR)/util/.time
$(PRISM_OBJS): $(PRISM_BUILD_DIR)/.time $(PRISM_BUILD_DIR)/util/.time

$(PRISM_BUILD_DIR)/.time $(PRISM_BUILD_DIR)/util/.time:
$(Q) $(MAKEDIRS) $(@D)
Expand Down Expand Up @@ -1292,7 +1295,8 @@ preludes: {$(VPATH)}miniprelude.c

{$(srcdir)}.rb.rbinc:
$(ECHO) making $@
$(Q) $(BASERUBY) $(tooldir)/mk_builtin_loader.rb $(SRC_FILE)
-$(Q) $(MAKE) $(DUMP_AST)
$(Q) $(BASERUBY) $(tooldir)/mk_builtin_loader.rb $(DUMP_AST) $(SRC_FILE)

$(BUILTIN_BINARY:yes=built)in_binary.rbbin: $(PREP) $(BUILTIN_RB_SRCS) $(srcdir)/template/builtin_binary.rbbin.tmpl
$(Q) $(MINIRUBY) $(tooldir)/generic_erb.rb -o $@ \
Expand All @@ -1302,7 +1306,11 @@ $(BUILTIN_BINARY:yes=built)in_binary.rbbin: $(PREP) $(BUILTIN_RB_SRCS) $(srcdir)
$(BUILTIN_BINARY:no=builtin)_binary.rbbin:
$(Q) echo> $@ // empty $(@F)

$(BUILTIN_RB_INCS): $(top_srcdir)/tool/mk_builtin_loader.rb
$(BUILTIN_RB_INCS): $(tooldir)/mk_builtin_loader.rb

dump_ast$(EXEEXT): $(tooldir)/dump_ast.c $(LIBPRISM_OBJS)
$(ECHO) compiling $@
$(Q) $(CC) $(CFLAGS) $(OUTFLAG)$@ $(INCFLAGS) $(tooldir)/dump_ast.c $(LIBPRISM_OBJS)

$(srcdir)/revision.h$(no_baseruby:no=~disabled~): $(REVISION_H)

Expand Down
1 change: 1 addition & 0 deletions compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -5375,6 +5375,7 @@ compile_hash(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *node, int meth
}
VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary) / 2);
rb_hash_bulk_insert(RARRAY_LEN(ary), RARRAY_CONST_PTR(ary), hash);
RB_GC_GUARD(ary);
hash = RB_OBJ_SET_FROZEN_SHAREABLE(rb_obj_hide(hash));

/* Emit optimized code */
Expand Down
6 changes: 6 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ AS_IF([test "$HAVE_BASERUBY" = no], [
AC_SUBST(BASERUBY)
AC_SUBST(HAVE_BASERUBY)

AC_ARG_WITH(dump-ast,
AS_HELP_STRING([--with-dump-ast=DUMP_AST], [use DUMP_AST as dump_ast; for cross-compiling with a host-built dump_ast]),
[DUMP_AST=$withval],
[DUMP_AST='./dump_ast$(EXEEXT)'])
AC_SUBST(DUMP_AST)

: ${GIT=git}
HAVE_GIT=yes
AC_ARG_WITH(git,
Expand Down
4 changes: 4 additions & 0 deletions defs/gmake.mk
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,10 @@ ifneq ($(DOT_WAIT),)
up:: $(DOT_WAIT) after-update
endif

ifneq ($(CC),false)
$(BUILTIN_RB_INCS): $(DUMP_AST)
endif

ifneq ($(filter update-bundled_gems refresh-gems,$(MAKECMDGOALS)),)
update-gems: update-bundled_gems
endif
Expand Down
24 changes: 24 additions & 0 deletions depend
Original file line number Diff line number Diff line change
Expand Up @@ -3639,6 +3639,30 @@ dmydln.$(OBJEXT): {$(VPATH)}st.h
dmydln.$(OBJEXT): {$(VPATH)}subst.h
dmyenc.$(OBJEXT): {$(VPATH)}dmyenc.c
dmyext.$(OBJEXT): {$(VPATH)}dmyext.c
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/ast.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/defines.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/diagnostic.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/encoding.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/node.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/options.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/parser.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/prettyprint.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/prism.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/regexp.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/static_literals.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_arena.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_buffer.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_char.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_constant_pool.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_integer.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_line_offset_list.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_list.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_memchr.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_string.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_strncasecmp.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/util/pm_strpbrk.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/prism/version.h
dump_ast-dump_ast.$(OBJEXT): $(top_srcdir)/tool/dump_ast.c
enc/ascii.$(OBJEXT): $(hdrdir)/ruby/ruby.h
enc/ascii.$(OBJEXT): {$(VPATH)}assert.h
enc/ascii.$(OBJEXT): {$(VPATH)}backward/2/assume.h
Expand Down
4 changes: 4 additions & 0 deletions prism/templates/src/node.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,11 @@ pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *no

// Dump the <%= field.name %> field
pm_buffer_append_byte(buffer, ',');
<%- if field.is_a?(Prism::Template::Flags) -%>
pm_buffer_append_string(buffer, "\"flags\":", 8);
<%- else -%>
pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
<%- end -%>
<%- case field -%>
<%- when Prism::Template::NodeField -%>
pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
Expand Down
4 changes: 1 addition & 3 deletions prism/templates/src/serialize.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ static void
pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));

size_t offset = buffer->length;

<%- if Prism::Template::INCLUDE_NODE_ID -%>
pm_buffer_append_varuint(buffer, node->node_id);
<%- end -%>
Expand Down Expand Up @@ -126,7 +124,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
<%- end -%>
<%- if node.needs_serialized_length? -%>
// serialize length
uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
uint32_t length = pm_sizet_to_u32(buffer->length - length_offset);
memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
<%- end -%>
break;
Expand Down
2 changes: 1 addition & 1 deletion prism/util/pm_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
* @param source The source of the string.
* @param length The length of the string.
*/
void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);

/**
* Represents the result of calling pm_string_mapped_init or
Expand Down
2 changes: 2 additions & 0 deletions prism_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,7 @@ pm_static_literal_value(rb_iseq_t *iseq, const pm_node_t *node, pm_scope_node_t

VALUE value = rb_hash_new_with_size(elements->size);
rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), value);
RB_GC_GUARD(array);

value = rb_obj_hide(value);
RB_OBJ_SET_FROZEN_SHAREABLE(value);
Expand Down Expand Up @@ -1533,6 +1534,7 @@ pm_compile_hash_elements(rb_iseq_t *iseq, const pm_node_t *node, const pm_node_l

VALUE hash = rb_hash_new_with_size(RARRAY_LEN(ary) / 2);
rb_hash_bulk_insert(RARRAY_LEN(ary), RARRAY_CONST_PTR(ary), hash);
RB_GC_GUARD(ary);
hash = rb_obj_hide(hash);
RB_OBJ_SET_FROZEN_SHAREABLE(hash);

Expand Down
2 changes: 1 addition & 1 deletion re.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ rb_memcicmp(const void *x, const void *y, long len)
return 0;
}

#ifdef HAVE_MEMMEM
#if defined(HAVE_MEMMEM) && !defined(__APPLE__)
static inline long
rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
{
Expand Down
1 change: 1 addition & 0 deletions template/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ CONFIGURE = @CONFIGURE@
MKFILES = @MAKEFILES@
BASERUBY = @BASERUBY@
HAVE_BASERUBY = @HAVE_BASERUBY@
DUMP_AST = @DUMP_AST@
TEST_RUNNABLE = @TEST_RUNNABLE@
CROSS_COMPILING = @CROSS_COMPILING@
DOXYGEN = @DOXYGEN@
Expand Down
2 changes: 1 addition & 1 deletion test/ruby/test_gc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ def test_old_to_young_reference
end

def test_finalizer_not_run_with_vm_lock
assert_ractor(<<~'RUBY')
assert_ractor(<<~'RUBY', timeout: 30)
Thread.new do
loop do
Encoding.list.each do |enc|
Expand Down
64 changes: 64 additions & 0 deletions tool/dump_ast.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include <stdlib.h>
#include <string.h>

/*
* When prism is compiled as part of CRuby, the xmalloc/xfree/etc. macros are
* redirected to ruby_xmalloc/ruby_xfree/etc. Since this is a standalone
* program that links against those same object files, we need to provide
* implementations of these functions.
*/
void *ruby_xmalloc(size_t size) { return malloc(size); }
void *ruby_xcalloc(size_t nelems, size_t elemsiz) { return calloc(nelems, elemsiz); }
void *ruby_xrealloc(void *ptr, size_t newsiz) { return realloc(ptr, newsiz); }
void ruby_xfree(void *ptr) { free(ptr); }

#include "prism.h"

int
main(int argc, const char *argv[]) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <filename>\n", argv[0]);
return EXIT_FAILURE;
}

const char *filepath = argv[1];
pm_string_t input;

if (pm_string_mapped_init(&input, filepath) != PM_STRING_INIT_SUCCESS) {
fprintf(stderr, "unable to map file: %s\n", filepath);
return EXIT_FAILURE;
}

pm_options_t options = { 0 };
pm_options_line_set(&options, 1);
pm_options_filepath_set(&options, filepath);

pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&arena, &parser, pm_string_source(&input), pm_string_length(&input), &options);

pm_node_t *node = pm_parse(&parser);
int exit_status;

if (parser.error_list.size > 0) {
fprintf(stderr, "error parsing %s\n", filepath);
for (const pm_diagnostic_t *diagnostic = (const pm_diagnostic_t *) parser.error_list.head; diagnostic != NULL; diagnostic = (const pm_diagnostic_t *) diagnostic->node.next) {
const pm_line_column_t line_column = pm_line_offset_list_line_column(&parser.line_offsets, diagnostic->location.start, parser.start_line);
fprintf(stderr, "%" PRIi32 ":%" PRIu32 ":%s\n", line_column.line, line_column.column, diagnostic->message);
}
exit_status = EXIT_FAILURE;
} else {
pm_buffer_t json = { 0 };
pm_dump_json(&json, &parser, node);
printf("%.*s\n", (int) pm_buffer_length(&json), pm_buffer_value(&json));
pm_buffer_free(&json);
exit_status = EXIT_SUCCESS;
}

pm_parser_free(&parser);
pm_arena_free(&arena);
pm_string_free(&input);
pm_options_free(&options);

return exit_status;
}
Loading