diff --git a/Doc/library/tulip_coro.dia b/Doc/library/tulip_coro.dia deleted file mode 100644 index 70a33e3c00cf6e..00000000000000 Binary files a/Doc/library/tulip_coro.dia and /dev/null differ diff --git a/Doc/library/tulip_coro.png b/Doc/library/tulip_coro.png deleted file mode 100644 index aad41c93015d09..00000000000000 Binary files a/Doc/library/tulip_coro.png and /dev/null differ diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..9f626cecd13630 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1218,14 +1218,14 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_BORROW_LOAD_FAST_BORROW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL_BUILTIN] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [LOAD_GLOBAL_MODULE] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG }, [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SMALL_INT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [LOAD_SPECIAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1250,7 +1250,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [POP_TOP] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, [PUSH_NULL] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, - [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [RERAISE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESERVED] = { true, INSTR_FMT_IX, 0 }, [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3348ce0f204f5a..d6afdb0b4b5805 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -24,7 +24,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_RESUME_CHECK] = HAS_DEOPT_FLAG, - [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG, [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, @@ -148,7 +148,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_LOCALS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_LOCALS] = HAS_ERROR_FLAG, [_LOAD_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_LOAD_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_PUSH_NULL_CONDITIONAL] = HAS_ARG_FLAG, @@ -308,7 +308,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG, - [_GUARD_IS_NONE_POP] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_IS_NONE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_NOT_NONE_POP] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, [_JUMP_TO_TOP] = 0, [_SET_IP] = 0, @@ -330,7 +330,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_UNDER_INLINE] = 0, [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, - [_START_EXECUTOR] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_START_EXECUTOR] = HAS_DEOPT_FLAG, [_MAKE_WARM] = 0, [_FATAL_ERROR] = 0, [_DEOPT] = 0, diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 13dcb5238945b6..1e6f60074308e2 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -220,6 +220,131 @@ def test_negative_locations_for_compile(self): # This also must not crash: ast.parse(tree, optimize=2) + def test_docstring_optimization_single_node(self): + # https://github.com/python/cpython/issues/137308 + class_example1 = textwrap.dedent(''' + class A: + """Docstring""" + ''') + class_example2 = textwrap.dedent(''' + class A: + """ + Docstring""" + ''') + def_example1 = textwrap.dedent(''' + def some(): + """Docstring""" + ''') + def_example2 = textwrap.dedent(''' + def some(): + """Docstring + """ + ''') + async_def_example1 = textwrap.dedent(''' + async def some(): + """Docstring""" + ''') + async_def_example2 = textwrap.dedent(''' + async def some(): + """ + Docstring + """ + ''') + for code in [ + class_example1, + class_example2, + def_example1, + def_example2, + async_def_example1, + async_def_example2, + ]: + for opt_level in [0, 1, 2]: + with self.subTest(code=code, opt_level=opt_level): + mod = ast.parse(code, optimize=opt_level) + self.assertEqual(len(mod.body[0].body), 1) + if opt_level == 2: + pass_stmt = mod.body[0].body[0] + self.assertIsInstance(pass_stmt, ast.Pass) + self.assertEqual( + vars(pass_stmt), + { + 'lineno': 3, + 'col_offset': 4, + 'end_lineno': 3, + 'end_col_offset': 8, + }, + ) + else: + self.assertIsInstance(mod.body[0].body[0], ast.Expr) + self.assertIsInstance( + mod.body[0].body[0].value, + ast.Constant, + ) + + compile(code, "a", "exec") + compile(code, "a", "exec", optimize=opt_level) + compile(mod, "a", "exec") + compile(mod, "a", "exec", optimize=opt_level) + + def test_docstring_optimization_multiple_nodes(self): + # https://github.com/python/cpython/issues/137308 + class_example = textwrap.dedent( + """ + class A: + ''' + Docstring + ''' + x = 1 + """ + ) + + def_example = textwrap.dedent( + """ + def some(): + ''' + Docstring + + ''' + x = 1 + """ + ) + + async_def_example = textwrap.dedent( + """ + async def some(): + + '''Docstring + + ''' + x = 1 + """ + ) + + for code in [ + class_example, + def_example, + async_def_example, + ]: + for opt_level in [0, 1, 2]: + with self.subTest(code=code, opt_level=opt_level): + mod = ast.parse(code, optimize=opt_level) + if opt_level == 2: + self.assertNotIsInstance( + mod.body[0].body[0], + (ast.Pass, ast.Expr), + ) + else: + self.assertIsInstance(mod.body[0].body[0], ast.Expr) + self.assertIsInstance( + mod.body[0].body[0].value, + ast.Constant, + ) + + compile(code, "a", "exec") + compile(code, "a", "exec", optimize=opt_level) + compile(mod, "a", "exec") + compile(mod, "a", "exec", optimize=opt_level) + def test_slice(self): slc = ast.parse("x[::]").body[0].value.slice self.assertIsNone(slc.upper) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-10-27-53.gh-issue-137308.at05p_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-10-27-53.gh-issue-137308.at05p_.rst new file mode 100644 index 00000000000000..8003de422b2919 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-10-27-53.gh-issue-137308.at05p_.rst @@ -0,0 +1,3 @@ +A standalone docstring in a node body is optimized as a :keyword:`pass` +statement to ensure that the node's body is never empty. There was a +:exc:`ValueError` in :func:`compile` otherwise. diff --git a/Python/ast_preprocess.c b/Python/ast_preprocess.c index bafd67ed790b20..44d3075098be75 100644 --- a/Python/ast_preprocess.c +++ b/Python/ast_preprocess.c @@ -435,13 +435,38 @@ stmt_seq_remove_item(asdl_stmt_seq *stmts, Py_ssize_t idx) return 1; } +static int +remove_docstring(asdl_stmt_seq *stmts, Py_ssize_t idx, PyArena *ctx_) +{ + assert(_PyAST_GetDocString(stmts) != NULL); + // In case there's just the docstring in the body, replace it with `pass` + // keyword, so body won't be empty. + if (asdl_seq_LEN(stmts) == 1) { + stmt_ty docstring = (stmt_ty)asdl_seq_GET(stmts, 0); + stmt_ty pass = _PyAST_Pass( + docstring->lineno, docstring->col_offset, + // we know that `pass` always takes 4 chars and a single line, + // while docstring can span on multiple lines + docstring->lineno, docstring->col_offset + 4, + ctx_ + ); + if (pass == NULL) { + return 0; + } + asdl_seq_SET(stmts, 0, pass); + return 1; + } + // In case there are more than 1 body items, just remove the docstring. + return stmt_seq_remove_item(stmts, idx); +} + static int astfold_body(asdl_stmt_seq *stmts, PyArena *ctx_, _PyASTPreprocessState *state) { int docstring = _PyAST_GetDocString(stmts) != NULL; if (docstring && (state->optimize >= 2)) { /* remove the docstring */ - if (!stmt_seq_remove_item(stmts, 0)) { + if (!remove_docstring(stmts, 0, ctx_)) { return 0; } docstring = 0; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6466d2615cd14e..a3c38ecdea3e1f 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -5,7 +5,7 @@ import re from typing import Optional, Callable -from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt +from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt, ForStmt, MacroIfStmt @dataclass class EscapingCall: @@ -723,53 +723,57 @@ def visit(stmt: Stmt) -> None: if error is not None: raise analysis_error(f"Escaping call '{error.text} in condition", error) +def escaping_call_in_simple_stmt(stmt: SimpleStmt, result: dict[SimpleStmt, EscapingCall]) -> None: + tokens = stmt.contents + for idx, tkn in enumerate(tokens): + try: + next_tkn = tokens[idx+1] + except IndexError: + break + if next_tkn.kind != lexer.LPAREN: + continue + if tkn.kind == lexer.IDENTIFIER: + if tkn.text.upper() == tkn.text: + # simple macro + continue + #if not tkn.text.startswith(("Py", "_Py", "monitor")): + # continue + if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): + # Optimize functions + continue + if tkn.text.endswith("Check"): + continue + if tkn.text.startswith("Py_Is"): + continue + if tkn.text.endswith("CheckExact"): + continue + if tkn.text in NON_ESCAPING_FUNCTIONS: + continue + elif tkn.kind == "RPAREN": + prev = tokens[idx-1] + if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": + #cast + continue + elif tkn.kind != "RBRACKET": + continue + if tkn.text in ("PyStackRef_CLOSE", "PyStackRef_XCLOSE"): + if len(tokens) <= idx+2: + raise analysis_error("Unexpected end of file", next_tkn) + kills = tokens[idx+2] + if kills.kind != "IDENTIFIER": + raise analysis_error(f"Expected identifier, got '{kills.text}'", kills) + else: + kills = None + result[stmt] = EscapingCall(stmt, tkn, kills) + + def find_escaping_api_calls(instr: parser.CodeDef) -> dict[SimpleStmt, EscapingCall]: result: dict[SimpleStmt, EscapingCall] = {} def visit(stmt: Stmt) -> None: if not isinstance(stmt, SimpleStmt): return - tokens = stmt.contents - for idx, tkn in enumerate(tokens): - try: - next_tkn = tokens[idx+1] - except IndexError: - break - if next_tkn.kind != lexer.LPAREN: - continue - if tkn.kind == lexer.IDENTIFIER: - if tkn.text.upper() == tkn.text: - # simple macro - continue - #if not tkn.text.startswith(("Py", "_Py", "monitor")): - # continue - if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")): - # Optimize functions - continue - if tkn.text.endswith("Check"): - continue - if tkn.text.startswith("Py_Is"): - continue - if tkn.text.endswith("CheckExact"): - continue - if tkn.text in NON_ESCAPING_FUNCTIONS: - continue - elif tkn.kind == "RPAREN": - prev = tokens[idx-1] - if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int": - #cast - continue - elif tkn.kind != "RBRACKET": - continue - if tkn.text in ("PyStackRef_CLOSE", "PyStackRef_XCLOSE"): - if len(tokens) <= idx+2: - raise analysis_error("Unexpected end of file", next_tkn) - kills = tokens[idx+2] - if kills.kind != "IDENTIFIER": - raise analysis_error(f"Expected identifier, got '{kills.text}'", kills) - else: - kills = None - result[stmt] = EscapingCall(stmt, tkn, kills) + escaping_call_in_simple_stmt(stmt, result) instr.block.accept(visit) check_escaping_calls(instr, result) @@ -822,6 +826,60 @@ def stack_effect_only_peeks(instr: parser.InstDef) -> bool: ) +def stmt_is_simple_exit(stmt: Stmt) -> bool: + if not isinstance(stmt, SimpleStmt): + return False + tokens = stmt.contents + if len(tokens) < 4: + return False + return ( + tokens[0].text in ("ERROR_IF", "DEOPT_IF", "EXIT_IF") + and + tokens[1].text == "(" + and + tokens[2].text in ("true", "1") + and + tokens[3].text == ")" + ) + + +def stmt_list_escapes(stmts: list[Stmt]) -> bool: + if not stmts: + return False + if stmt_is_simple_exit(stmts[-1]): + return False + for stmt in stmts: + if stmt_escapes(stmt): + return True + return False + + +def stmt_escapes(stmt: Stmt) -> bool: + if isinstance(stmt, BlockStmt): + return stmt_list_escapes(stmt.body) + elif isinstance(stmt, SimpleStmt): + for tkn in stmt.contents: + if tkn.text == "DECREF_INPUTS": + return True + d: dict[SimpleStmt, EscapingCall] = {} + escaping_call_in_simple_stmt(stmt, d) + return bool(d) + elif isinstance(stmt, IfStmt): + if stmt.else_body and stmt_escapes(stmt.else_body): + return True + return stmt_escapes(stmt.body) + elif isinstance(stmt, MacroIfStmt): + if stmt.else_body and stmt_list_escapes(stmt.else_body): + return True + return stmt_list_escapes(stmt.body) + elif isinstance(stmt, ForStmt): + return stmt_escapes(stmt.body) + elif isinstance(stmt, WhileStmt): + return stmt_escapes(stmt.body) + else: + assert False, "Unexpected statement type" + + def compute_properties(op: parser.CodeDef) -> Properties: escaping_calls = find_escaping_api_calls(op) has_free = ( @@ -843,7 +901,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) - escapes = bool(escaping_calls) or variable_used(op, "DECREF_INPUTS") + escapes = stmt_escapes(op.block) pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations return Properties( diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index cd5f1ed9f3e268..65254a7c375456 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -5,8 +5,7 @@ This program converts a textual Uniforum-style message catalog (.po file) into a binary GNU catalog (.mo file). This is essentially the same function as the -GNU msgfmt program, however, it is a simpler implementation. Currently it -does not handle plural forms but it does handle message contexts. +GNU msgfmt program, however, it is a simpler implementation. Usage: msgfmt.py [OPTIONS] filename.po