Skip to content

Commit 9ecac8d

Browse files
kddnewtonmatzbot
authored andcommitted
[ruby/prism] Fix the handling of do on commands
Introduce PM_TOKEN_KEYWORD_DO_BLOCK to distinguish do-blocks on command-style calls from regular `do` keywords. Add parse_command_do_block to attach these blocks to call nodes. Track in_endless_def_body to prevent do-block consumption inside endless method definitions, allowing blocks to correctly bubble up to outer contexts like `private def f = puts "Hello" do end`. ruby/prism@7d17fd254b
1 parent 980bc39 commit 9ecac8d

File tree

10 files changed

+104
-17
lines changed

10 files changed

+104
-17
lines changed

lib/prism/lex_compat.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def deconstruct_keys(keys) # :nodoc:
134134
KEYWORD_DEF: :on_kw,
135135
KEYWORD_DEFINED: :on_kw,
136136
KEYWORD_DO: :on_kw,
137+
KEYWORD_DO_BLOCK: :on_kw,
137138
KEYWORD_DO_LOOP: :on_kw,
138139
KEYWORD_ELSE: :on_kw,
139140
KEYWORD_ELSIF: :on_kw,

lib/prism/translation/parser/lexer.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ class Lexer # :nodoc:
8787
KEYWORD_DEF: :kDEF,
8888
KEYWORD_DEFINED: :kDEFINED,
8989
KEYWORD_DO: :kDO,
90+
KEYWORD_DO_BLOCK: :kDO_BLOCK,
9091
KEYWORD_DO_LOOP: :kDO_COND,
9192
KEYWORD_END: :kEND,
9293
KEYWORD_END_UPCASE: :klEND,

prism/config.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ tokens:
494494
comment: "def"
495495
- name: KEYWORD_DEFINED
496496
comment: "defined?"
497+
- name: KEYWORD_DO_BLOCK
498+
comment: "do keyword for a block attached to a command"
497499
- name: KEYWORD_DO_LOOP
498500
comment: "do keyword for a predicate in a while, until, or for loop"
499501
- name: KEYWORD_END_UPCASE

prism/parser.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,13 @@ struct pm_parser {
885885
/** Whether or not we're at the beginning of a command. */
886886
bool command_start;
887887

888+
/**
889+
* Whether or not we're currently parsing the body of an endless method
890+
* definition. In this context, PM_TOKEN_KEYWORD_DO_BLOCK should not be
891+
* consumed by commands (it should bubble up to the outer context).
892+
*/
893+
bool in_endless_def_body;
894+
888895
/** Whether or not we're currently recovering from a syntax error. */
889896
bool recovering;
890897

prism/prism.c

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8330,9 +8330,15 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
83308330
switch (width) {
83318331
case 2:
83328332
if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8333+
if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
8334+
return PM_TOKEN_KEYWORD_DO;
8335+
}
83338336
if (pm_do_loop_stack_p(parser)) {
83348337
return PM_TOKEN_KEYWORD_DO_LOOP;
83358338
}
8339+
if (!pm_accepts_block_stack_p(parser)) {
8340+
return PM_TOKEN_KEYWORD_DO_BLOCK;
8341+
}
83368342
return PM_TOKEN_KEYWORD_DO;
83378343
}
83388344

@@ -12497,6 +12503,7 @@ token_begins_expression_p(pm_token_type_t type) {
1249712503
case PM_TOKEN_EOF:
1249812504
case PM_TOKEN_LAMBDA_BEGIN:
1249912505
case PM_TOKEN_KEYWORD_DO:
12506+
case PM_TOKEN_KEYWORD_DO_BLOCK:
1250012507
case PM_TOKEN_KEYWORD_DO_LOOP:
1250112508
case PM_TOKEN_KEYWORD_END:
1250212509
case PM_TOKEN_KEYWORD_ELSE:
@@ -14825,6 +14832,27 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
1482514832
return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
1482614833
}
1482714834

14835+
/**
14836+
* Attach a do-block (PM_TOKEN_KEYWORD_DO_BLOCK) to a command-style call node.
14837+
* The current token must be PM_TOKEN_KEYWORD_DO_BLOCK when this is called.
14838+
*/
14839+
static void
14840+
parse_command_do_block(pm_parser_t *parser, pm_call_node_t *call, uint16_t depth) {
14841+
parser_lex(parser);
14842+
pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
14843+
14844+
if (call->block != NULL) {
14845+
pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14846+
if (call->arguments == NULL) {
14847+
call->arguments = pm_arguments_node_create(parser);
14848+
}
14849+
pm_arguments_node_arguments_append(parser->arena, call->arguments, call->block);
14850+
}
14851+
14852+
call->block = UP(block);
14853+
PM_NODE_LENGTH_SET_NODE(call, block);
14854+
}
14855+
1482814856
/**
1482914857
* Parse a list of arguments and their surrounding parentheses if they are
1483014858
* present. It returns true if it found any pieces of arguments (parentheses,
@@ -14833,6 +14861,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
1483314861
static bool
1483414862
parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
1483514863
bool found = false;
14864+
bool parsed_command_args = false;
1483614865

1483714866
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
1483814867
found |= true;
@@ -14855,6 +14884,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
1485514884
}
1485614885
} else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
1485714886
found |= true;
14887+
parsed_command_args = true;
1485814888
pm_accepts_block_stack_push(parser, false);
1485914889

1486014890
// If we get here, then the subsequent token cannot be used as an infix
@@ -14885,6 +14915,9 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
1488514915
} else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
1488614916
found |= true;
1488714917
block = parse_block(parser, (uint16_t) (depth + 1));
14918+
} else if (parsed_command_args && pm_accepts_block_stack_p(parser) && !parser->in_endless_def_body && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
14919+
found |= true;
14920+
block = parse_block(parser, (uint16_t) (depth + 1));
1488814921
}
1488914922

1489014923
if (block != NULL) {
@@ -15300,7 +15333,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
1530015333
#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
1530115334
case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
1530215335
case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15303-
case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15336+
case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
1530415337
case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
1530515338
case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
1530615339
case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
@@ -17486,7 +17519,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1748617519
element = UP(pm_keyword_hash_node_create(parser));
1748717520
pm_static_literals_t hash_keys = { 0 };
1748817521

17489-
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17522+
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
1749017523
parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
1749117524
}
1749217525

@@ -18895,20 +18928,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
1889518928
allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
1889618929
}
1889718930

18931+
// Inside a def body, we push true onto the
18932+
// accepts_block_stack so that `do` is lexed as
18933+
// PM_TOKEN_KEYWORD_DO (which can only start a block for
18934+
// primary-level constructs, not commands). During command
18935+
// argument parsing, the stack is pushed to false, causing
18936+
// `do` to be lexed as PM_TOKEN_KEYWORD_DO_BLOCK, which
18937+
// is not consumed inside the endless def body and instead
18938+
// left for the outer context.
18939+
pm_accepts_block_stack_push(parser, true);
18940+
bool previous_in_endless_def_body = parser->in_endless_def_body;
18941+
parser->in_endless_def_body = true;
1889818942
pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18943+
parser->in_endless_def_body = previous_in_endless_def_body;
18944+
pm_accepts_block_stack_pop(parser);
1889918945

18900-
// In an endless method definition, the body is not allowed to
18901-
// be a command with a do..end block.
18902-
if (PM_NODE_TYPE_P(statement, PM_CALL_NODE)) {
18903-
pm_call_node_t *call = (pm_call_node_t *) statement;
18904-
18905-
if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
18906-
pm_block_node_t *block = (pm_block_node_t *) call->block;
18907-
18908-
if (parser->start[block->opening_loc.start] != '{') {
18909-
pm_parser_err_node(parser, call->block, PM_ERR_DEF_ENDLESS_DO_BLOCK);
18910-
}
18911-
}
18946+
// If an unconsumed PM_TOKEN_KEYWORD_DO follows the body,
18947+
// it is an error (e.g., `def f = 1 do end`).
18948+
// PM_TOKEN_KEYWORD_DO_BLOCK is intentionally not caught
18949+
// here — it should bubble up to the outer context (e.g.,
18950+
// `private def f = puts "Hello" do end` where the block
18951+
// attaches to `private`).
18952+
if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
18953+
pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
18954+
pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
1891218955
}
1891318956

1891418957
if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
@@ -20066,9 +20109,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
2006620109
opening = parser->previous;
2006720110

2006820111
if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20069-
pm_accepts_block_stack_push(parser, true);
2007020112
body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20071-
pm_accepts_block_stack_pop(parser);
2007220113
}
2007320114

2007420115
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
@@ -21518,6 +21559,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
2151821559
}
2151921560
break;
2152021561
case PM_CALL_NODE:
21562+
// A do-block can attach to a command-style call at the
21563+
// primary level. Inside an endless def body, DO_BLOCK must
21564+
// not be consumed so it can bubble up to the outer context
21565+
// (e.g., `private` in `private def f = bar baz do end`).
21566+
if (match1(parser, PM_TOKEN_KEYWORD_DO_BLOCK) && !parser->in_endless_def_body && pm_accepts_block_stack_p(parser) && pm_call_node_command_p((pm_call_node_t *) node)) {
21567+
parse_command_do_block(parser, (pm_call_node_t *) node, depth);
21568+
}
21569+
2152121570
// If we have a call node, then we need to check if it looks like a
2152221571
// method call without parentheses that contains arguments. If it
2152321572
// does, then it has different rules for parsing infix operators,
@@ -21573,6 +21622,13 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
2157321622
}
2157421623
break;
2157521624
case PM_CALL_NODE:
21625+
// A do-block can attach to a command-style call
21626+
// produced by infix operators (e.g., dot-calls like
21627+
// `obj.method args do end`).
21628+
if (match1(parser, PM_TOKEN_KEYWORD_DO_BLOCK) && !parser->in_endless_def_body && pm_accepts_block_stack_p(parser) && pm_call_node_command_p((pm_call_node_t *) node)) {
21629+
parse_command_do_block(parser, (pm_call_node_t *) node, depth);
21630+
}
21631+
2157621632
// These expressions are also statements, by virtue of the
2157721633
// right-hand side of the expression (i.e., the last argument to
2157821634
// the call node) being an implicit array.

prism/templates/src/token_type.c.erb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ pm_token_type_human(pm_token_type_t token_type) {
167167
return "'defined?'";
168168
case PM_TOKEN_KEYWORD_DO:
169169
return "'do'";
170+
case PM_TOKEN_KEYWORD_DO_BLOCK:
171+
return "'do'";
170172
case PM_TOKEN_KEYWORD_DO_LOOP:
171173
return "'do'";
172174
case PM_TOKEN_KEYWORD_ELSE:
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
def a = a b do 1 end
2-
^~~~~~~~ unexpected `do` for block in an endless method definition
2+
^~ unexpected 'do', expecting end-of-input
3+
^~ unexpected 'do', ignoring it
4+
^~~ unexpected 'end', expecting end-of-input
5+
^~~ unexpected 'end', ignoring it
36

test/prism/fixtures/4.0/endless_methods_command_call.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ private def foo(x) = puts x
66
private def obj.foo = puts "Hello"
77
private def obj.foo() = puts "Hello"
88
private def obj.foo(x) = puts x
9+
10+
private def foo = bar baz
11+
private def foo = bar baz do expr end

test/prism/fixtures/blocks.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,11 @@ foo lambda { |
5252
}
5353

5454
foo do |bar,| end
55+
56+
foo bar baz, qux do end
57+
58+
foo.bar baz do end
59+
60+
foo.bar baz do end.qux quux do end
61+
62+
foo bar, baz do |x| x end

test/prism/fixtures/endless_methods.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@ def bar = A ""
55
def method = 1 + 2 + 3
66

77
x = def f = p 1
8+
9+
def foo = bar baz
10+
11+
def foo = bar(baz)

0 commit comments

Comments
 (0)