From 04f8ef663be7589def9f456a2024e1707e3408ea Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Thu, 14 Aug 2025 13:58:11 -0400 Subject: [PATCH 1/4] gh-137576: Fix for Basic REPL showing incorrect code in tracebacks with `PYTHONSTARTUP` (#137625) Co-authored-by: Kirill Podoprigora --- Lib/test/test_repl.py | 62 +++++++++++++++++++ Lib/traceback.py | 2 +- ...-08-10-21-34-12.gh-issue-137576.0ZicS-.rst | 2 + Python/pythonrun.c | 27 +++++++- 4 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-08-10-21-34-12.gh-issue-137576.0ZicS-.rst diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py index f4a4634fc62f8a..54e69277282c30 100644 --- a/Lib/test/test_repl.py +++ b/Lib/test/test_repl.py @@ -188,6 +188,68 @@ def foo(x): ] self.assertEqual(traceback_lines, expected_lines) + def test_pythonstartup_error_reporting(self): + # errors based on https://github.com/python/cpython/issues/137576 + + def make_repl(env): + return subprocess.Popen( + [os.path.join(os.path.dirname(sys.executable), ''), "-i"], + executable=sys.executable, + text=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env, + ) + + # case 1: error in user input, but PYTHONSTARTUP is fine + with os_helper.temp_dir() as tmpdir: + script = os.path.join(tmpdir, "pythonstartup.py") + with open(script, "w") as f: + f.write("print('from pythonstartup')" + os.linesep) + + env = os.environ.copy() + env['PYTHONSTARTUP'] = script + env["PYTHON_HISTORY"] = os.path.join(tmpdir, ".pythonhist") + p = make_repl(env) + p.stdin.write("1/0") + output = kill_python(p) + expected = dedent(""" + Traceback (most recent call last): + File "", line 1, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + """) + self.assertIn("from pythonstartup", output) + self.assertIn(expected, output) + + # case 2: error in PYTHONSTARTUP triggered by user input + with os_helper.temp_dir() as tmpdir: + script = os.path.join(tmpdir, "pythonstartup.py") + with open(script, "w") as f: + f.write("def foo():\n 1/0\n") + + env = os.environ.copy() + env['PYTHONSTARTUP'] = script + env["PYTHON_HISTORY"] = os.path.join(tmpdir, ".pythonhist") + p = make_repl(env) + p.stdin.write('foo()') + output = kill_python(p) + expected = dedent(""" + Traceback (most recent call last): + File "", line 1, in + foo() + ~~~^^ + File "%s", line 2, in foo + 1/0 + ~^~ + ZeroDivisionError: division by zero + """) % script + self.assertIn(expected, output) + + + def test_runsource_show_syntax_error_location(self): user_input = dedent("""def f(x, x): ... """) diff --git a/Lib/traceback.py b/Lib/traceback.py index 318ec13cf91121..1fe295add3a6dd 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -541,7 +541,7 @@ def format_frame_summary(self, frame_summary, **kwargs): colorize = kwargs.get("colorize", False) row = [] filename = frame_summary.filename - if frame_summary.filename.startswith("-"): + if frame_summary.filename.startswith("'): filename = "" if colorize: theme = _colorize.get_theme(force_color=True).traceback diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-10-21-34-12.gh-issue-137576.0ZicS-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-10-21-34-12.gh-issue-137576.0ZicS-.rst new file mode 100644 index 00000000000000..19e0f3bf10e04b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-10-21-34-12.gh-issue-137576.0ZicS-.rst @@ -0,0 +1,2 @@ +Fix for incorrect source code being shown in tracebacks from the Basic REPL +when :envvar:`PYTHONSTARTUP` is given. Patch by Adam Hartz. diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 8f1c78bf831863..45211e1b075042 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1365,6 +1365,29 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py return PyEval_EvalCode((PyObject*)co, globals, locals); } +static PyObject * +get_interactive_filename(PyObject *filename, Py_ssize_t count) +{ + PyObject *result; + Py_ssize_t len = PyUnicode_GET_LENGTH(filename); + + if (len >= 2 + && PyUnicode_ReadChar(filename, 0) == '<' + && PyUnicode_ReadChar(filename, len - 1) == '>') { + PyObject *middle = PyUnicode_Substring(filename, 1, len-1); + if (middle == NULL) { + return NULL; + } + result = PyUnicode_FromFormat("<%U-%d>", middle, count); + Py_DECREF(middle); + } else { + result = PyUnicode_FromFormat( + "%U-%d", filename, count); + } + return result; + +} + static PyObject * run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals, PyCompilerFlags *flags, PyArena *arena, PyObject* interactive_src, @@ -1375,8 +1398,8 @@ run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals, if (interactive_src) { PyInterpreterState *interp = tstate->interp; if (generate_new_source) { - interactive_filename = PyUnicode_FromFormat( - "%U-%d", filename, interp->_interactive_src_count++); + interactive_filename = get_interactive_filename( + filename, interp->_interactive_src_count++); } else { Py_INCREF(interactive_filename); } From 0cbbfc462119b9107b373c24d2bda5a1271bed36 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 14 Aug 2025 21:13:22 +0300 Subject: [PATCH 2/4] gh-135661: Fix CDATA section parsing in HTMLParser (GH-135665) "] ]>" and "]] >" no longer end the CDATA section. Make CDATA section parsing context depending. Add private method HTMLParser._set_support_cdata() to change the context. If called with True, "<[CDATA[" starts a CDATA section which ends with "]]>". If called with False, "<[CDATA[" starts a bogus comments which ends with ">". --- Lib/html/parser.py | 28 +++++- Lib/test/test_htmlparser.py | 86 ++++++++++++++----- ...-06-18-13-34-55.gh-issue-135661.NZlpWf.rst | 5 ++ 3 files changed, 94 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 5d03c98df5cdd0..75bf8adae6d70a 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -146,6 +146,7 @@ def reset(self): self.lasttag = '???' self.interesting = interesting_normal self.cdata_elem = None + self._support_cdata = True self._escapable = True super().reset() @@ -183,6 +184,19 @@ def clear_cdata_mode(self): self.cdata_elem = None self._escapable = True + def _set_support_cdata(self, flag=True): + """Enable or disable support of the CDATA sections. + If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>". + If disabled, "<[CDATA[" starts a bogus comments which ends with ">". + + This method is not called by default. Its purpose is to be called + in custom handle_starttag() and handle_endtag() methods, with + value that depends on the adjusted current node. + See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state + for details. + """ + self._support_cdata = flag + # Internal -- handle data as far as reasonable. May leave state # and data to be processed by a subsequent call. If 'end' is # true, force handling all data as if followed by EOF marker. @@ -258,7 +272,10 @@ def goahead(self, end): break self.handle_comment(rawdata[i+4:j]) elif startswith("', i+9) + if j < 0: + return -1 + self.unknown_decl(rawdata[i+3: j]) + return j + 3 + else: + return self.parse_bogus_comment(i) elif rawdata[i:i+9].lower() == ' gtpos = rawdata.find('>', i+9) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 380bbe40177ec5..fff41dab321acd 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -10,10 +10,13 @@ class EventCollector(html.parser.HTMLParser): - def __init__(self, *args, **kw): + def __init__(self, *args, autocdata=False, **kw): + self.autocdata = autocdata self.events = [] self.append = self.events.append html.parser.HTMLParser.__init__(self, *args, **kw) + if autocdata: + self._set_support_cdata(False) def get_events(self): # Normalize the list of events so that buffer artefacts don't @@ -34,12 +37,16 @@ def get_events(self): def handle_starttag(self, tag, attrs): self.append(("starttag", tag, attrs)) + if self.autocdata and tag == 'svg': + self._set_support_cdata(True) def handle_startendtag(self, tag, attrs): self.append(("startendtag", tag, attrs)) def handle_endtag(self, tag): self.append(("endtag", tag)) + if self.autocdata and tag == 'svg': + self._set_support_cdata(False) # all other markup @@ -767,10 +774,6 @@ def test_eof_in_declarations(self): ('' '' @@ -845,28 +860,53 @@ def test_broken_condcoms(self): ] self._run_check(html, expected) - def test_cdata_declarations(self): - # More tests should be added. See also "8.2.4.42. Markup - # declaration open state", "8.2.4.69. CDATA section state", - # and issue 32876 - html = ('') - expected = [('unknown decl', 'CDATA[just some plain text')] + @support.subTests('content', [ + 'just some plain text', + '', + '¬-an-entity-ref;', + "", + '', + '[[I have many brackets]]', + 'I have a > in the middle', + 'I have a ]] in the middle', + '] ]>', + ']] >', + ('\n' + ' if (a < b && a > b) {\n' + ' printf("[How?]");\n' + ' }\n'), + ]) + def test_cdata_section_content(self, content): + # See "13.2.5.42 Markup declaration open state", + # "13.2.5.69 CDATA section state", and issue bpo-32876. + html = f'{content}' + expected = [ + ('starttag', 'svg', []), + ('starttag', 'text', [('y', '100')]), + ('unknown decl', 'CDATA[' + content), + ('endtag', 'text'), + ('endtag', 'svg'), + ] self._run_check(html, expected) + self._run_check(html, expected, collector=EventCollector(autocdata=True)) - def test_cdata_declarations_multiline(self): - html = (' b) {' - ' printf("[How?]");' - ' }' - ']]>') + def test_cdata_section(self): + # See "13.2.5.42 Markup declaration open state". + html = ('bar]]>' + 'foo<br>bar' + 'bar]]>') expected = [ - ('starttag', 'code', []), - ('unknown decl', - 'CDATA[ if (a < b && a > b) { ' - 'printf("[How?]"); }'), - ('endtag', 'code') + ('comment', '[CDATA[foo'), + ('starttag', 'svg', []), + ('starttag', 'text', [('y', '100')]), + ('unknown decl', 'CDATA[foo
bar'), + ('endtag', 'text'), + ('endtag', 'svg'), + ('comment', '[CDATA[foo'), ] - self._run_check(html, expected) + self._run_check(html, expected, collector=EventCollector(autocdata=True)) def test_convert_charrefs_dropped_text(self): # #23144: make sure that all the events are triggered when diff --git a/Misc/NEWS.d/next/Security/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst b/Misc/NEWS.d/next/Security/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst new file mode 100644 index 00000000000000..fe000d936aae9d --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-06-18-13-34-55.gh-issue-135661.NZlpWf.rst @@ -0,0 +1,5 @@ +Fix CDATA section parsing in :class:`html.parser.HTMLParser` according to +the HTML5 standard: ``] ]>`` and ``]] >`` no longer end the CDATA section. +Add private method ``_set_support_cdata()`` which can be used to specify +how to parse ``<[CDATA[`` --- as a CDATA section in foreign content +(SVG or MathML) or as a bogus comment in the HTML namespace. From 781294019db1247e6197d658cfcbc35c8c0ae25f Mon Sep 17 00:00:00 2001 From: Justin Applegate <70449145+Legoclones@users.noreply.github.com> Date: Thu, 14 Aug 2025 13:22:37 -0600 Subject: [PATCH 3/4] gh-135241: Make unpickling of booleans in protocol 0 more strict (GH-135242) The Python pickle module looks for "00" and "01" but _pickle only looked for 2 characters that parsed to 0 or 1, meaning some payloads like "+0" or " 0" would lead to different results in different implementations. --- Lib/test/pickletester.py | 10 ++++++++++ .../2025-06-08-01-10-34.gh-issue-135241.5j18IW.rst | 3 +++ Modules/_pickle.c | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-08-01-10-34.gh-issue-135241.5j18IW.rst diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 9a3a26a8400844..1a7658b13fa5e3 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1012,6 +1012,16 @@ def test_constants(self): self.assertIs(self.loads(b'I01\n.'), True) self.assertIs(self.loads(b'I00\n.'), False) + def test_issue135241(self): + # C implementation should check for hardcoded values 00 and 01 + # when getting booleans from the INT opcode. Doing a str comparison + # to bypass truthy/falsy comparisons. These payloads should return + # 0, not False. + out1 = self.loads(b'I+0\n.') + self.assertEqual(str(out1), '0') + out2 = self.loads(b'I 0\n.') + self.assertEqual(str(out2), '0') + def test_zero_padded_integers(self): self.assertEqual(self.loads(b'I010\n.'), 10) self.assertEqual(self.loads(b'I-010\n.'), -10) diff --git a/Misc/NEWS.d/next/Library/2025-06-08-01-10-34.gh-issue-135241.5j18IW.rst b/Misc/NEWS.d/next/Library/2025-06-08-01-10-34.gh-issue-135241.5j18IW.rst new file mode 100644 index 00000000000000..058ef11083e782 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-08-01-10-34.gh-issue-135241.5j18IW.rst @@ -0,0 +1,3 @@ +The :code:`INT` opcode of the C accelerator :mod:`!_pickle` module was updated +to look only for "00" and "01" to push booleans onto the stack, aligning with +the Python :mod:`pickle` module. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index cf3ceb43fb3f3f..bc06478799345a 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -5255,7 +5255,7 @@ load_int(PickleState *state, UnpicklerObject *self) } } else { - if (len == 3 && (x == 0 || x == 1)) { + if (len == 3 && s[0] == '0' && (s[1] == '0' || s[1] == '1')) { if ((value = PyBool_FromLong(x)) == NULL) return -1; } From 8665769614183263a4370b35a4fdbe852e651e17 Mon Sep 17 00:00:00 2001 From: ROHAN WINSOR <61073765+rohanwinsor@users.noreply.github.com> Date: Fri, 15 Aug 2025 02:56:02 +0530 Subject: [PATCH 4/4] gh-137716: Fix double period in AttributeError message for invalid mock assertions (#137717) --- Lib/unittest/mock.py | 2 +- .../2025-08-13-16-58-58.gh-issue-137716.ZcZSyi.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-16-58-58.gh-issue-137716.ZcZSyi.rst diff --git a/Lib/unittest/mock.py b/Lib/unittest/mock.py index e1dbfdacf56337..0bb6750655380d 100644 --- a/Lib/unittest/mock.py +++ b/Lib/unittest/mock.py @@ -700,7 +700,7 @@ def __getattr__(self, name): if name.startswith(('assert', 'assret', 'asert', 'aseert', 'assrt')) or name in _ATTRIB_DENY_LIST: raise AttributeError( f"{name!r} is not a valid assertion. Use a spec " - f"for the mock if {name!r} is meant to be an attribute.") + f"for the mock if {name!r} is meant to be an attribute") with NonCallableMock._lock: result = self._mock_children.get(name) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-16-58-58.gh-issue-137716.ZcZSyi.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-16-58-58.gh-issue-137716.ZcZSyi.rst new file mode 100644 index 00000000000000..82d8e1577905a7 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-16-58-58.gh-issue-137716.ZcZSyi.rst @@ -0,0 +1 @@ +Fix double period in :exc:`AttributeError` message for invalid mock assertions