From dfeb160bc35f0ba16800d07b85cb11598d1cd307 Mon Sep 17 00:00:00 2001 From: grayjk Date: Tue, 7 Apr 2026 09:05:23 -0400 Subject: [PATCH 1/5] gh-130273: Fix traceback color output with unicode characters (GH-142529) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Account for the display width of Unicode characters so that colors and underlining in traceback output is correct. Co-authored-by: Łukasz Langa Co-authored-by: Victor Stinner --- Lib/_pyrepl/utils.py | 1 + Lib/test/test_traceback.py | 87 +++++++++++++++++++ Lib/traceback.py | 70 +++++++++++---- ...-12-10-15-15-09.gh-issue-130273.iCfiY5.rst | 1 + 4 files changed, 144 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 7175d57a9e319e..d399b4cf53c82a 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -16,6 +16,7 @@ from .types import CharBuffer, CharWidths from .trace import trace + ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 5dc11253e0d5c8..909808825f055e 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -1790,6 +1790,7 @@ def f(): ] self.assertEqual(result_lines, expected) + class TestKeywordTypoSuggestions(unittest.TestCase): TYPO_CASES = [ ("with block ad something:\n pass", "and"), @@ -5414,6 +5415,92 @@ def expected(t, m, fn, l, f, E, e, z, n): ] self.assertEqual(actual, expected(**colors)) + def test_colorized_traceback_unicode(self): + try: + 啊哈=1; 啊哈/0#### + except Exception as e: + exc = traceback.TracebackException.from_exception(e) + + actual = "".join(exc.format(colorize=True)).splitlines() + def expected(t, m, fn, l, f, E, e, z, n): + return [ + f" 啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####", + f" {e}~~~~{z}{E}^{z}{e}~{z}", + ] + self.assertEqual(actual[2:4], expected(**colors)) + + try: + ééééé/0 + except Exception as e: + exc = traceback.TracebackException.from_exception(e) + + actual = "".join(exc.format(colorize=True)).splitlines() + def expected(t, m, fn, l, f, E, e, z, n): + return [ + f" {E}ééééé{z}/0", + f" {E}^^^^^{z}", + ] + self.assertEqual(actual[2:4], expected(**colors)) + + def test_colorized_syntax_error_ascii_display_width(self): + """Caret alignment for ASCII edge cases handled by _wlen. + + The old ASCII fast track in _display_width returned the raw character + offset for ASCII strings, which is wrong for CTRL-Z (display width 2) + and ANSI escape sequences (display width 0). + """ + E = colors["E"] + z = colors["z"] + t = colors["t"] + m = colors["m"] + fn = colors["fn"] + l = colors["l"] + + def _make_syntax_error(text, offset, end_offset): + err = SyntaxError("invalid syntax") + err.filename = "" + err.lineno = 1 + err.end_lineno = 1 + err.text = text + err.offset = offset + err.end_offset = end_offset + return err + + # CTRL-Z (\x1a) is ASCII but displayed as ^Z (2 columns). + # Verify caret aligns when CTRL-Z precedes the error. + err = _make_syntax_error("a\x1a$\n", offset=3, end_offset=4) + exc = traceback.TracebackException.from_exception(err) + actual = "".join(exc.format(colorize=True)) + # 'a' (1 col) + '\x1a' (2 cols) = 3 cols before '$' + self.assertIn( + f' File {fn}""{z}, line {l}1{z}\n' + f' a\x1a{E}${z}\n' + f' {" " * 3}{E}^{z}\n' + f'{t}SyntaxError{z}: {m}invalid syntax{z}\n', + actual, + ) + + # CTRL-Z in the highlighted (error) region counts as 2 columns. + err = _make_syntax_error("$\x1a\n", offset=1, end_offset=3) + exc = traceback.TracebackException.from_exception(err) + actual = "".join(exc.format(colorize=True)) + # '$' (1 col) + '\x1a' (2 cols) = 3 columns of carets + self.assertIn( + f' {E}$\x1a{z}\n' + f' {E}{"^" * 3}{z}\n', + actual, + ) + + # ANSI escape sequences are ASCII but take 0 display columns. + err = _make_syntax_error("a\x1b[1mb$\n", offset=7, end_offset=8) + exc = traceback.TracebackException.from_exception(err) + actual = "".join(exc.format(colorize=True)) + # 'a' (1 col) + '\x1b[1m' (0 cols) + 'b' (1 col) = 2 before '$' + self.assertIn( + f' a\x1b[1mb{E}${z}\n' + f' {" " * 2}{E}^{z}\n', + actual, + ) class TestLazyImportSuggestions(unittest.TestCase): """Test that lazy imports are not reified when computing AttributeError suggestions.""" diff --git a/Lib/traceback.py b/Lib/traceback.py index 1f9f151ebf5d39..343d0e5f108c35 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -1,9 +1,11 @@ """Extract, format and print information about Python stack traces.""" import collections.abc +import functools import itertools import linecache import os +import re import sys import textwrap import types @@ -684,12 +686,12 @@ def output_line(lineno): colorized_line_parts = [] colorized_carets_parts = [] - for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]): + for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]): caret_group = list(group) - if color == "^": + if "^" in color: colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset) colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset) - elif color == "~": + elif "~" in color: colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset) colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset) else: @@ -971,7 +973,54 @@ def setup_positions(expr, force_valid=True): return None -_WIDE_CHAR_SPECIFIERS = "WF" + +def _zip_display_width(line, carets): + carets = iter(carets) + if line.isascii() and '\x1a' not in line: + for char in line: + yield char, next(carets, "") + return + + import unicodedata + for char in unicodedata.iter_graphemes(line): + char = str(char) + char_width = _display_width(char) + yield char, "".join(itertools.islice(carets, char_width)) + + +@functools.cache +def _str_width(c: str) -> int: + # copied from _pyrepl.utils to fix gh-130273 + + if ord(c) < 128: + return 1 + import unicodedata + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 + w = unicodedata.east_asian_width(c) + if w in ("N", "Na", "H", "A"): + return 1 + return 2 + + +_ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") + + +def _wlen(s: str) -> int: + # copied from _pyrepl.utils to fix gh-130273 + + if len(s) == 1 and s != "\x1a": + return _str_width(s) + length = sum(_str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = _ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count("\x1a") + return length - sum(len(i) for i in sequence) + ctrl_z_cnt + def _display_width(line, offset=None): """Calculate the extra amount of width space the given source @@ -979,18 +1028,9 @@ def _display_width(line, offset=None): width output device. Supports wide unicode characters and emojis.""" if offset is None: - offset = len(line) - - # Fast track for ASCII-only strings - if line.isascii(): - return offset + return _wlen(line) - import unicodedata - - return sum( - 2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1 - for char in line[:offset] - ) + return _wlen(line[:offset]) def _format_note(note, indent, theme): diff --git a/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst new file mode 100644 index 00000000000000..2e0695334fd71e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst @@ -0,0 +1 @@ +Fix traceback color output with Unicode characters. From e65987d4c0d63585dd63e742ec3c8aa07645ffc5 Mon Sep 17 00:00:00 2001 From: Jonathan Dung Date: Tue, 7 Apr 2026 21:26:53 +0800 Subject: [PATCH 2/5] gh-146547: Use lazy imports in ctypes (GH-146548) --- Lib/ctypes/__init__.py | 3 ++- Lib/ctypes/_layout.py | 3 ++- Lib/ctypes/util.py | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index 1c822759eca912..6d7cb56f6c354e 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -2,9 +2,10 @@ import os as _os import sys as _sys -import sysconfig as _sysconfig import types as _types +lazy import sysconfig as _sysconfig + from _ctypes import Union, Structure, Array from _ctypes import _Pointer from _ctypes import CFuncPtr as _CFuncPtr diff --git a/Lib/ctypes/_layout.py b/Lib/ctypes/_layout.py index 2048ccb6a1c93f..61b4c6c70fb875 100644 --- a/Lib/ctypes/_layout.py +++ b/Lib/ctypes/_layout.py @@ -5,11 +5,12 @@ """ import sys -import warnings from _ctypes import CField, buffer_info import ctypes +lazy import warnings + def round_down(n, multiple): assert n >= 0 assert multiple > 0 diff --git a/Lib/ctypes/util.py b/Lib/ctypes/util.py index 3b21658433b2ed..35ac5b6bfd6a37 100644 --- a/Lib/ctypes/util.py +++ b/Lib/ctypes/util.py @@ -1,8 +1,9 @@ import os -import shutil -import subprocess import sys +lazy import shutil +lazy import subprocess + # find_library(name) returns the pathname of a library, or None. if os.name == "nt": From feee573f36ecdc767d73c95d43e68dd02e41549d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 7 Apr 2026 16:05:39 +0200 Subject: [PATCH 3/5] gh-148014: Accept a function name in -X presite option (#148015) --- Doc/c-api/init_config.rst | 6 +- Doc/using/cmdline.rst | 11 +- Lib/test/_test_embed_structseq.py | 29 +++-- Lib/test/cov.py | 8 +- Lib/test/libregrtest/runtests.py | 2 +- Lib/test/support/__init__.py | 2 +- Lib/test/test_cmd_line.py | 22 ++++ Lib/test/test_embed.py | 2 +- ...-04-02-17-52-33.gh-issue-148014.2Y6ND_.rst | 2 + Python/pylifecycle.c | 118 ++++++++++++++++-- 10 files changed, 166 insertions(+), 36 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-02-17-52-33.gh-issue-148014.2Y6ND_.rst diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index f6dc604a609cb1..209e48767ccfd6 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -1807,10 +1807,10 @@ PyConfig .. c:member:: wchar_t* run_presite - ``package.module`` path to module that should be imported before - ``site.py`` is run. + ``module`` or ``module:func`` entry point that should be executed before + the :mod:`site` module is imported. - Set by the :option:`-X presite=package.module <-X>` command-line + Set by the :option:`-X presite=module:func <-X>` command-line option and the :envvar:`PYTHON_PRESITE` environment variable. The command-line option takes precedence. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index d0355ce47a6504..7cbc03f5f1281e 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -654,13 +654,17 @@ Miscellaneous options .. versionadded:: 3.13 - * :samp:`-X presite={package.module}` specifies a module that should be - imported before the :mod:`site` module is executed and before the + * :samp:`-X presite={module}` or :samp:`-X presite={module:func}` specifies + an entry point that should be executed before the :mod:`site` module is + executed and before the :mod:`__main__` module exists. Therefore, the imported module isn't :mod:`__main__`. This can be used to execute code early during Python initialization. Python needs to be :ref:`built in debug mode ` for this option to exist. See also :envvar:`PYTHON_PRESITE`. + .. versionchanged:: next + Accept also ``module:func`` entry point format. + .. versionadded:: 3.13 * :samp:`-X gil={0,1}` forces the GIL to be disabled or enabled, @@ -1458,4 +1462,7 @@ Debug-mode variables Needs Python configured with the :option:`--with-pydebug` build option. + .. versionchanged:: next + Accept also ``module:func`` entry point format. + .. versionadded:: 3.13 diff --git a/Lib/test/_test_embed_structseq.py b/Lib/test/_test_embed_structseq.py index 4cac84d7a469ac..c6050ca62aafca 100644 --- a/Lib/test/_test_embed_structseq.py +++ b/Lib/test/_test_embed_structseq.py @@ -47,16 +47,21 @@ def test_sys_funcs(self): self.check_structseq(type(obj)) -try: - unittest.main( - module=( - '__main__' - if __name__ == '__main__' - # Avoiding a circular import: - else sys.modules['test._test_embed_structseq'] +def main(): + try: + unittest.main( + module=( + '__main__' + if __name__ == '__main__' + # Avoiding a circular import: + else sys.modules['test._test_embed_structseq'] + ) ) - ) -except SystemExit as exc: - if exc.args[0] != 0: - raise -print("Tests passed") + except SystemExit as exc: + if exc.args[0] != 0: + raise + print("Tests passed") + + +if __name__ == "__main__": + main() diff --git a/Lib/test/cov.py b/Lib/test/cov.py index e4699c7afe174a..8717b1f20dd979 100644 --- a/Lib/test/cov.py +++ b/Lib/test/cov.py @@ -1,8 +1,7 @@ """A minimal hook for gathering line coverage of the standard library. -Designed to be used with -Xpresite= which means: -* it installs itself on import -* it's not imported as `__main__` so can't use the ifmain idiom +Designed to be used with -Xpresite=test.cov:enable which means: + * it can't import anything besides `sys` to avoid tainting gathered coverage * filenames are not normalized @@ -45,4 +44,5 @@ def disable(): mon.free_tool_id(mon.COVERAGE_ID) -enable() +if __name__ == "__main__": + enable() diff --git a/Lib/test/libregrtest/runtests.py b/Lib/test/libregrtest/runtests.py index e6d34d8e6a3be5..0a9edce1085be5 100644 --- a/Lib/test/libregrtest/runtests.py +++ b/Lib/test/libregrtest/runtests.py @@ -159,7 +159,7 @@ def create_python_cmd(self) -> list[str]: if '-u' not in python_opts: cmd.append('-u') # Unbuffered stdout and stderr if self.coverage: - cmd.append("-Xpresite=test.cov") + cmd.append("-Xpresite=test.cov:enable") return cmd def bisect_cmd_args(self) -> list[str]: diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 8ff061e074074f..2cac70f4ab2afb 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1397,7 +1397,7 @@ def trace_wrapper(*args, **kwargs): sys.settrace(original_trace) coverage_wrapper = trace_wrapper - if 'test.cov' in sys.modules: # -Xpresite=test.cov used + if 'test.cov' in sys.modules: # -Xpresite=test.cov:enable used cov = sys.monitoring.COVERAGE_ID @functools.wraps(func) def coverage_wrapper(*args, **kwargs): diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 5f035c35367d64..8740f65b7b0d1d 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -32,6 +32,17 @@ def _kill_python_and_exit_code(p): return data, returncode +def presite_func(): + print("presite func") + +class Namespace: + pass + +presite = Namespace() +presite.attr = Namespace() +presite.attr.func = presite_func + + class CmdLineTest(unittest.TestCase): def test_directories(self): assert_python_failure('.') @@ -1266,6 +1277,17 @@ def test_invalid_thread_local_bytecode(self): rc, out, err = assert_python_failure(PYTHON_TLBC="2") self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err) + @unittest.skipUnless(support.Py_DEBUG, + '-X presite requires a Python debug build') + def test_presite(self): + entrypoint = "test.test_cmd_line:presite_func" + proc = assert_python_ok("-X", f"presite={entrypoint}", "-c", "pass") + self.assertEqual(proc.out.rstrip(), b"presite func") + + entrypoint = "test.test_cmd_line:presite.attr.func" + proc = assert_python_ok("-X", f"presite={entrypoint}", "-c", "pass") + self.assertEqual(proc.out.rstrip(), b"presite func") + @unittest.skipIf(interpreter_requires_environment(), 'Cannot run -I tests when PYTHON env vars are required.') diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 45d0d8308dbdea..a2de58c2926456 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -2051,7 +2051,7 @@ def test_no_memleak(self): def test_presite(self): cmd = [ sys.executable, - "-I", "-X", "presite=test._test_embed_structseq", + "-I", "-X", "presite=test._test_embed_structseq:main", "-c", "print('unique-python-message')", ] proc = subprocess.run( diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-02-17-52-33.gh-issue-148014.2Y6ND_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-02-17-52-33.gh-issue-148014.2Y6ND_.rst new file mode 100644 index 00000000000000..964e5bdabbe1f0 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-02-17-52-33.gh-issue-148014.2Y6ND_.rst @@ -0,0 +1,2 @@ +Accept a function name in :option:`-X presite <-X>` command line option and +:envvar:`PYTHON_PRESITE` environment variable. Patch by Victor Stinner. diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 5da0f3e5be3a70..8be9e6d7373826 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1218,6 +1218,54 @@ pyinit_main_reconfigure(PyThreadState *tstate) #ifdef Py_DEBUG +// Equivalent to the Python code: +// +// for part in attr.split('.'): +// obj = getattr(obj, part) +static PyObject* +presite_resolve_name(PyObject *obj, PyObject *attr) +{ + obj = Py_NewRef(obj); + attr = Py_NewRef(attr); + PyObject *res; + + while (1) { + Py_ssize_t len = PyUnicode_GET_LENGTH(attr); + Py_ssize_t pos = PyUnicode_FindChar(attr, '.', 0, len, 1); + if (pos < 0) { + break; + } + + PyObject *name = PyUnicode_Substring(attr, 0, pos); + if (name == NULL) { + goto error; + } + res = PyObject_GetAttr(obj, name); + Py_DECREF(name); + if (res == NULL) { + goto error; + } + Py_SETREF(obj, res); + + PyObject *suffix = PyUnicode_Substring(attr, pos + 1, len); + if (suffix == NULL) { + goto error; + } + Py_SETREF(attr, suffix); + } + + res = PyObject_GetAttr(obj, attr); + Py_DECREF(obj); + Py_DECREF(attr); + return res; + +error: + Py_DECREF(obj); + Py_DECREF(attr); + return NULL; +} + + static void run_presite(PyThreadState *tstate) { @@ -1228,22 +1276,68 @@ run_presite(PyThreadState *tstate) return; } - PyObject *presite_modname = PyUnicode_FromWideChar( - config->run_presite, - wcslen(config->run_presite) - ); - if (presite_modname == NULL) { - fprintf(stderr, "Could not convert pre-site module name to unicode\n"); + PyObject *presite = PyUnicode_FromWideChar(config->run_presite, -1); + if (presite == NULL) { + fprintf(stderr, "Could not convert pre-site command to Unicode\n"); + _PyErr_Print(tstate); + return; + } + + // Accept "mod_name" and "mod_name:func_name" entry point syntax + Py_ssize_t len = PyUnicode_GET_LENGTH(presite); + Py_ssize_t pos = PyUnicode_FindChar(presite, ':', 0, len, 1); + PyObject *mod_name = NULL; + PyObject *func_name = NULL; + PyObject *module = NULL; + if (pos > 0) { + mod_name = PyUnicode_Substring(presite, 0, pos); + if (mod_name == NULL) { + goto error; + } + + func_name = PyUnicode_Substring(presite, pos + 1, len); + if (func_name == NULL) { + goto error; + } } else { - PyObject *presite = PyImport_Import(presite_modname); - if (presite == NULL) { - fprintf(stderr, "pre-site import failed:\n"); - _PyErr_Print(tstate); + mod_name = Py_NewRef(presite); + } + + // mod_name can contain dots (ex: "math.integer") + module = PyImport_Import(mod_name); + if (module == NULL) { + goto error; + } + + if (func_name != NULL) { + PyObject *func = presite_resolve_name(module, func_name); + if (func == NULL) { + goto error; + } + + PyObject *res = PyObject_CallNoArgs(func); + Py_DECREF(func); + if (res == NULL) { + goto error; } - Py_XDECREF(presite); - Py_DECREF(presite_modname); + Py_DECREF(res); } + + Py_DECREF(presite); + Py_DECREF(mod_name); + Py_XDECREF(func_name); + Py_DECREF(module); + return; + +error: + fprintf(stderr, "pre-site failed:\n"); + _PyErr_Print(tstate); + + Py_DECREF(presite); + Py_XDECREF(mod_name); + Py_XDECREF(func_name); + Py_XDECREF(module); } #endif From 7e0a0be4097f9d29d66fe23f5af86f18a34ed7dd Mon Sep 17 00:00:00 2001 From: Joshua Swanson <22283299+joshuaswanson@users.noreply.github.com> Date: Tue, 7 Apr 2026 16:10:34 +0200 Subject: [PATCH 4/5] gh-146333: Fix quadratic regex backtracking in configparser option parsing (GH-146399) Use negative lookahead in option regex to prevent backtracking, and to avoid changing logic outside the regexes (since people could use the regex directly). --- Lib/configparser.py | 8 ++++++-- Lib/test/test_configparser.py | 20 +++++++++++++++++++ ...3-25-00-51-03.gh-issue-146333.LqdL__bn.rst | 3 +++ 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2026-03-25-00-51-03.gh-issue-146333.LqdL__bn.rst diff --git a/Lib/configparser.py b/Lib/configparser.py index d435a5c2fe0da2..e76647d339e913 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -613,7 +613,9 @@ class RawConfigParser(MutableMapping): \] # ] """ _OPT_TMPL = r""" - (?P