@@ -22016,6 +22016,7 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
2201622016 .partial_script = false,
2201722017 .command_start = true,
2201822018 .recovering = false,
22019+ .continuable = true,
2201922020 .encoding_locked = false,
2202022021 .encoding_changed = false,
2202122022 .pattern_matching_newlines = false,
@@ -22292,12 +22293,176 @@ pm_parser_free(pm_parser_t *parser) {
2229222293 }
2229322294}
2229422295
22296+ /**
22297+ * Returns true if the given diagnostic ID represents an error that cannot be
22298+ * fixed by appending more input. These are errors where the existing source
22299+ * contains definitively invalid syntax (as opposed to merely incomplete input).
22300+ */
22301+ static bool
22302+ pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
22303+ switch (diag_id) {
22304+ case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
22305+ case PM_ERR_BEGIN_UPCASE_BRACE:
22306+ case PM_ERR_CLASS_VARIABLE_BARE:
22307+ case PM_ERR_END_UPCASE_BRACE:
22308+ case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
22309+ case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
22310+ case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
22311+ case PM_ERR_EXPRESSION_NOT_WRITABLE:
22312+ case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
22313+ case PM_ERR_FLOAT_PARSE:
22314+ case PM_ERR_GLOBAL_VARIABLE_BARE:
22315+ case PM_ERR_HASH_KEY:
22316+ case PM_ERR_HEREDOC_IDENTIFIER:
22317+ case PM_ERR_INSTANCE_VARIABLE_BARE:
22318+ case PM_ERR_INVALID_BLOCK_EXIT:
22319+ case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
22320+ case PM_ERR_INVALID_FLOAT_EXPONENT:
22321+ case PM_ERR_INVALID_NUMBER_BINARY:
22322+ case PM_ERR_INVALID_NUMBER_DECIMAL:
22323+ case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
22324+ case PM_ERR_INVALID_NUMBER_OCTAL:
22325+ case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
22326+ case PM_ERR_NO_LOCAL_VARIABLE:
22327+ case PM_ERR_PARAMETER_ORDER:
22328+ case PM_ERR_STATEMENT_UNDEF:
22329+ case PM_ERR_VOID_EXPRESSION:
22330+ return true;
22331+ default:
22332+ return false;
22333+ }
22334+ }
22335+
22336+ /**
22337+ * Determine whether the source parsed by the given parser could become valid if
22338+ * more input were appended. This is used by tools like IRB to decide whether to
22339+ * prompt for continuation or to display an error.
22340+ *
22341+ * The parser starts with continuable=true. This function scans all errors to
22342+ * detect two categories of non-continuable errors:
22343+ *
22344+ * 1. Fatal errors: errors like invalid number literals or bare global variables
22345+ * that indicate definitively invalid syntax. These are only considered fatal
22346+ * if they occur before EOF (at EOF they could be from truncated input, e.g.
22347+ * `"\x` is an incomplete hex escape).
22348+ *
22349+ * 2. Stray tokens: unexpected_token_ignore and unexpected_token_close_context
22350+ * errors indicate tokens that don't belong. A stray token is a cascade
22351+ * effect (and does not prevent continuability) if:
22352+ *
22353+ * a. A non-stray, non-fatal error appeared earlier in the error list at a
22354+ * strictly earlier source position (the stray was caused by a preceding
22355+ * parse failure, e.g. a truncated heredoc), OR
22356+ * b. The stray token is at EOF, starts after position 0 (there is valid
22357+ * code before it), and either is a single byte (likely a truncated
22358+ * token like `\`) or there are non-stray errors elsewhere.
22359+ *
22360+ * Closing delimiters (`)`, `]`, `}`) at EOF are always genuinely stray —
22361+ * they are complete tokens and cannot become part of a longer valid
22362+ * construct by appending more input.
22363+ *
22364+ * c. The stray token is `=` at the start of a line, which could be the
22365+ * beginning of `=begin` (an embedded document). The remaining bytes
22366+ * after `=` may parse as an identifier, so the error is not at EOF,
22367+ * but the construct is genuinely incomplete.
22368+ */
22369+ static void
22370+ pm_parse_continuable(pm_parser_t *parser) {
22371+ // If there are no errors then there is nothing to continue.
22372+ if (parser->error_list.size == 0) {
22373+ parser->continuable = false;
22374+ return;
22375+ }
22376+
22377+ if (!parser->continuable) return;
22378+
22379+ size_t source_length = (size_t) (parser->end - parser->start);
22380+
22381+ // First pass: check if there are any non-stray, non-fatal errors.
22382+ bool has_non_stray_error = false;
22383+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22384+ if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
22385+ has_non_stray_error = true;
22386+ break;
22387+ }
22388+ }
22389+
22390+ // Second pass: check each error. We track the minimum source position
22391+ // among non-stray, non-fatal errors seen so far in list order, which
22392+ // lets us detect cascade stray tokens.
22393+ size_t non_stray_min_start = SIZE_MAX;
22394+
22395+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22396+ size_t error_start = (size_t) error->location.start;
22397+ size_t error_end = error_start + (size_t) error->location.length;
22398+ bool at_eof = error_end >= source_length;
22399+
22400+ // Fatal errors are non-continuable unless they occur at EOF.
22401+ if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
22402+ parser->continuable = false;
22403+ return;
22404+ }
22405+
22406+ // Track non-stray, non-fatal error positions in list order.
22407+ if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
22408+ error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
22409+ if (error_start < non_stray_min_start) non_stray_min_start = error_start;
22410+ continue;
22411+ }
22412+
22413+ // This is a stray token. Determine if it is a cascade effect
22414+ // of a preceding error or genuinely stray.
22415+
22416+ // Rule (a): a non-stray error was seen earlier in the list at a
22417+ // strictly earlier position — this stray is a cascade effect.
22418+ if (non_stray_min_start < error_start) continue;
22419+
22420+ // Rule (b): this stray is at EOF with valid code before it.
22421+ // Single-byte stray tokens at EOF (like `\` for line continuation)
22422+ // are likely truncated tokens. Multi-byte stray tokens (like the
22423+ // keyword `end`) need additional evidence that they are cascade
22424+ // effects (i.e. non-stray errors exist elsewhere).
22425+ if (at_eof && error_start > 0) {
22426+ // Exception: closing delimiters at EOF are genuinely stray.
22427+ if (error->location.length == 1) {
22428+ const uint8_t *byte = parser->start + error_start;
22429+ if (*byte == ')' || *byte == ']' || *byte == '}') {
22430+ parser->continuable = false;
22431+ return;
22432+ }
22433+
22434+ // Single-byte non-delimiter stray at EOF: cascade.
22435+ continue;
22436+ }
22437+
22438+ // Multi-byte stray at EOF: cascade only if there are
22439+ // non-stray errors (evidence of a preceding parse failure).
22440+ if (has_non_stray_error) continue;
22441+ }
22442+
22443+ // Rule (c): a stray `=` at the start of a line could be the
22444+ // beginning of an embedded document (`=begin`). The remaining
22445+ // bytes after `=` parse as an identifier, so the error is not
22446+ // at EOF, but the construct is genuinely incomplete.
22447+ if (error->location.length == 1) {
22448+ const uint8_t *byte = parser->start + error_start;
22449+ if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
22450+ }
22451+
22452+ // This stray token is genuinely non-continuable.
22453+ parser->continuable = false;
22454+ return;
22455+ }
22456+ }
22457+
2229522458/**
2229622459 * Parse the Ruby source associated with the given parser and return the tree.
2229722460 */
2229822461PRISM_EXPORTED_FUNCTION pm_node_t *
2229922462pm_parse(pm_parser_t *parser) {
22300- return parse_program(parser);
22463+ pm_node_t *node = parse_program(parser);
22464+ pm_parse_continuable(parser);
22465+ return node;
2230122466}
2230222467
2230322468/**
0 commit comments