Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions Lib/html/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ def reset(self):
self.lasttag = '???'
self.interesting = interesting_normal
self.cdata_elem = None
self._support_cdata = True
self._escapable = True
super().reset()

Expand Down Expand Up @@ -183,6 +184,19 @@ def clear_cdata_mode(self):
self.cdata_elem = None
self._escapable = True

def _set_support_cdata(self, flag=True):
"""Enable or disable support of the CDATA sections.
If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>".
If disabled, "<[CDATA[" starts a bogus comments which ends with ">".

This method is not called by default. Its purpose is to be called
in custom handle_starttag() and handle_endtag() methods, with
value that depends on the adjusted current node.
See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
for details.
"""
self._support_cdata = flag

# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
Expand Down Expand Up @@ -258,7 +272,10 @@ def goahead(self, end):
break
self.handle_comment(rawdata[i+4:j])
elif startswith("<![CDATA[", i):
self.unknown_decl(rawdata[i+3:])
if self._support_cdata:
self.unknown_decl(rawdata[i+3:])
else:
self.handle_comment(rawdata[i+1:])
elif rawdata[i:i+9].lower() == '<!doctype':
self.handle_decl(rawdata[i+2:])
elif startswith("<!", i):
Expand Down Expand Up @@ -334,7 +351,14 @@ def parse_html_declaration(self, i):
# this case is actually already handled in goahead()
return self.parse_comment(i)
elif rawdata[i:i+9] == '<![CDATA[':
return self.parse_marked_section(i)
if self._support_cdata:
j = rawdata.find(']]>', i+9)
if j < 0:
return -1
self.unknown_decl(rawdata[i+3: j])
return j + 3
else:
return self.parse_bogus_comment(i)
elif rawdata[i:i+9].lower() == '<!doctype':
# find the closing >
gtpos = rawdata.find('>', i+9)
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,16 @@ def test_constants(self):
self.assertIs(self.loads(b'I01\n.'), True)
self.assertIs(self.loads(b'I00\n.'), False)

def test_issue135241(self):
# C implementation should check for hardcoded values 00 and 01
# when getting booleans from the INT opcode. Doing a str comparison
# to bypass truthy/falsy comparisons. These payloads should return
# 0, not False.
out1 = self.loads(b'I+0\n.')
self.assertEqual(str(out1), '0')
out2 = self.loads(b'I 0\n.')
self.assertEqual(str(out2), '0')

def test_zero_padded_integers(self):
self.assertEqual(self.loads(b'I010\n.'), 10)
self.assertEqual(self.loads(b'I-010\n.'), -10)
Expand Down
86 changes: 63 additions & 23 deletions Lib/test/test_htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@

class EventCollector(html.parser.HTMLParser):

def __init__(self, *args, **kw):
def __init__(self, *args, autocdata=False, **kw):
self.autocdata = autocdata
self.events = []
self.append = self.events.append
html.parser.HTMLParser.__init__(self, *args, **kw)
if autocdata:
self._set_support_cdata(False)

def get_events(self):
# Normalize the list of events so that buffer artefacts don't
Expand All @@ -34,12 +37,16 @@ def get_events(self):

def handle_starttag(self, tag, attrs):
self.append(("starttag", tag, attrs))
if self.autocdata and tag == 'svg':
self._set_support_cdata(True)

def handle_startendtag(self, tag, attrs):
self.append(("startendtag", tag, attrs))

def handle_endtag(self, tag):
self.append(("endtag", tag))
if self.autocdata and tag == 'svg':
self._set_support_cdata(False)

# all other markup

Expand Down Expand Up @@ -767,10 +774,6 @@ def test_eof_in_declarations(self):
('<!', [('comment', '')]),
('<!-', [('comment', '-')]),
('<![', [('comment', '[')]),
('<![CDATA[', [('unknown decl', 'CDATA[')]),
('<![CDATA[x', [('unknown decl', 'CDATA[x')]),
('<![CDATA[x]', [('unknown decl', 'CDATA[x]')]),
('<![CDATA[x]]', [('unknown decl', 'CDATA[x]]')]),
('<!DOCTYPE', [('decl', 'DOCTYPE')]),
('<!DOCTYPE ', [('decl', 'DOCTYPE ')]),
('<!DOCTYPE html', [('decl', 'DOCTYPE html')]),
Expand All @@ -783,6 +786,18 @@ def test_eof_in_declarations(self):
for html, expected in data:
self._run_check(html, expected)

@support.subTests('content', ['', 'x', 'x]', 'x]]'])
def test_eof_in_cdata(self, content):
self._run_check('<![CDATA[' + content,
[('unknown decl', 'CDATA[' + content)])
self._run_check('<![CDATA[' + content,
[('comment', '![CDATA[' + content)],
collector=EventCollector(autocdata=True))
self._run_check('<svg><text y="100"><![CDATA[' + content,
[('starttag', 'svg', []),
('starttag', 'text', [('y', '100')]),
('unknown decl', 'CDATA[' + content)])

def test_bogus_comments(self):
html = ('<!ELEMENT br EMPTY>'
'<! not really a comment >'
Expand Down Expand Up @@ -845,28 +860,53 @@ def test_broken_condcoms(self):
]
self._run_check(html, expected)

def test_cdata_declarations(self):
# More tests should be added. See also "8.2.4.42. Markup
# declaration open state", "8.2.4.69. CDATA section state",
# and issue 32876
html = ('<![CDATA[just some plain text]]>')
expected = [('unknown decl', 'CDATA[just some plain text')]
@support.subTests('content', [
'just some plain text',
'<!-- not a comment -->',
'&not-an-entity-ref;',
"<not a='start tag'>",
'',
'[[I have many brackets]]',
'I have a > in the middle',
'I have a ]] in the middle',
'] ]>',
']] >',
('\n'
' if (a < b && a > b) {\n'
' printf("[<marquee>How?</marquee>]");\n'
' }\n'),
])
def test_cdata_section_content(self, content):
# See "13.2.5.42 Markup declaration open state",
# "13.2.5.69 CDATA section state", and issue bpo-32876.
html = f'<svg><text y="100"><![CDATA[{content}]]></text></svg>'
expected = [
('starttag', 'svg', []),
('starttag', 'text', [('y', '100')]),
('unknown decl', 'CDATA[' + content),
('endtag', 'text'),
('endtag', 'svg'),
]
self._run_check(html, expected)
self._run_check(html, expected, collector=EventCollector(autocdata=True))

def test_cdata_declarations_multiline(self):
html = ('<code><![CDATA['
' if (a < b && a > b) {'
' printf("[<marquee>How?</marquee>]");'
' }'
']]></code>')
def test_cdata_section(self):
# See "13.2.5.42 Markup declaration open state".
html = ('<![CDATA[foo<br>bar]]>'
'<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
'<![CDATA[foo<br>bar]]>')
expected = [
('starttag', 'code', []),
('unknown decl',
'CDATA[ if (a < b && a > b) { '
'printf("[<marquee>How?</marquee>]"); }'),
('endtag', 'code')
('comment', '[CDATA[foo<br'),
('data', 'bar]]>'),
('starttag', 'svg', []),
('starttag', 'text', [('y', '100')]),
('unknown decl', 'CDATA[foo<br>bar'),
('endtag', 'text'),
('endtag', 'svg'),
('comment', '[CDATA[foo<br'),
('data', 'bar]]>'),
]
self._run_check(html, expected)
self._run_check(html, expected, collector=EventCollector(autocdata=True))

def test_convert_charrefs_dropped_text(self):
# #23144: make sure that all the events are triggered when
Expand Down
62 changes: 62 additions & 0 deletions Lib/test/test_repl.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,68 @@ def foo(x):
]
self.assertEqual(traceback_lines, expected_lines)

def test_pythonstartup_error_reporting(self):
# errors based on https://github.com/python/cpython/issues/137576

def make_repl(env):
return subprocess.Popen(
[os.path.join(os.path.dirname(sys.executable), '<stdin>'), "-i"],
executable=sys.executable,
text=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=env,
)

# case 1: error in user input, but PYTHONSTARTUP is fine
with os_helper.temp_dir() as tmpdir:
script = os.path.join(tmpdir, "pythonstartup.py")
with open(script, "w") as f:
f.write("print('from pythonstartup')" + os.linesep)

env = os.environ.copy()
env['PYTHONSTARTUP'] = script
env["PYTHON_HISTORY"] = os.path.join(tmpdir, ".pythonhist")
p = make_repl(env)
p.stdin.write("1/0")
output = kill_python(p)
expected = dedent("""
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
1/0
~^~
ZeroDivisionError: division by zero
""")
self.assertIn("from pythonstartup", output)
self.assertIn(expected, output)

# case 2: error in PYTHONSTARTUP triggered by user input
with os_helper.temp_dir() as tmpdir:
script = os.path.join(tmpdir, "pythonstartup.py")
with open(script, "w") as f:
f.write("def foo():\n 1/0\n")

env = os.environ.copy()
env['PYTHONSTARTUP'] = script
env["PYTHON_HISTORY"] = os.path.join(tmpdir, ".pythonhist")
p = make_repl(env)
p.stdin.write('foo()')
output = kill_python(p)
expected = dedent("""
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
foo()
~~~^^
File "%s", line 2, in foo
1/0
~^~
ZeroDivisionError: division by zero
""") % script
self.assertIn(expected, output)



def test_runsource_show_syntax_error_location(self):
user_input = dedent("""def f(x, x): ...
""")
Expand Down
2 changes: 1 addition & 1 deletion Lib/traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ def format_frame_summary(self, frame_summary, **kwargs):
colorize = kwargs.get("colorize", False)
row = []
filename = frame_summary.filename
if frame_summary.filename.startswith("<stdin>-"):
if frame_summary.filename.startswith("<stdin-") and frame_summary.filename.endswith('>'):
filename = "<stdin>"
if colorize:
theme = _colorize.get_theme(force_color=True).traceback
Expand Down
2 changes: 1 addition & 1 deletion Lib/unittest/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,7 @@ def __getattr__(self, name):
if name.startswith(('assert', 'assret', 'asert', 'aseert', 'assrt')) or name in _ATTRIB_DENY_LIST:
raise AttributeError(
f"{name!r} is not a valid assertion. Use a spec "
f"for the mock if {name!r} is meant to be an attribute.")
f"for the mock if {name!r} is meant to be an attribute")

with NonCallableMock._lock:
result = self._mock_children.get(name)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix for incorrect source code being shown in tracebacks from the Basic REPL
when :envvar:`PYTHONSTARTUP` is given. Patch by Adam Hartz.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix double period in :exc:`AttributeError` message for invalid mock assertions
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The :code:`INT` opcode of the C accelerator :mod:`!_pickle` module was updated
to look only for "00" and "01" to push booleans onto the stack, aligning with
the Python :mod:`pickle` module.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fix CDATA section parsing in :class:`html.parser.HTMLParser` according to
the HTML5 standard: ``] ]>`` and ``]] >`` no longer end the CDATA section.
Add private method ``_set_support_cdata()`` which can be used to specify
how to parse ``<[CDATA[`` --- as a CDATA section in foreign content
(SVG or MathML) or as a bogus comment in the HTML namespace.
2 changes: 1 addition & 1 deletion Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -5255,7 +5255,7 @@ load_int(PickleState *state, UnpicklerObject *self)
}
}
else {
if (len == 3 && (x == 0 || x == 1)) {
if (len == 3 && s[0] == '0' && (s[1] == '0' || s[1] == '1')) {
if ((value = PyBool_FromLong(x)) == NULL)
return -1;
}
Expand Down
27 changes: 25 additions & 2 deletions Python/pythonrun.c
Original file line number Diff line number Diff line change
Expand Up @@ -1365,6 +1365,29 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py
return PyEval_EvalCode((PyObject*)co, globals, locals);
}

static PyObject *
get_interactive_filename(PyObject *filename, Py_ssize_t count)
{
PyObject *result;
Py_ssize_t len = PyUnicode_GET_LENGTH(filename);

if (len >= 2
&& PyUnicode_ReadChar(filename, 0) == '<'
&& PyUnicode_ReadChar(filename, len - 1) == '>') {
PyObject *middle = PyUnicode_Substring(filename, 1, len-1);
if (middle == NULL) {
return NULL;
}
result = PyUnicode_FromFormat("<%U-%d>", middle, count);
Py_DECREF(middle);
} else {
result = PyUnicode_FromFormat(
"%U-%d", filename, count);
}
return result;

}

static PyObject *
run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals,
PyCompilerFlags *flags, PyArena *arena, PyObject* interactive_src,
Expand All @@ -1375,8 +1398,8 @@ run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals,
if (interactive_src) {
PyInterpreterState *interp = tstate->interp;
if (generate_new_source) {
interactive_filename = PyUnicode_FromFormat(
"%U-%d", filename, interp->_interactive_src_count++);
interactive_filename = get_interactive_filename(
filename, interp->_interactive_src_count++);
} else {
Py_INCREF(interactive_filename);
}
Expand Down
Loading