diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index ea3ec7d95dc45d8..494621672171f2b 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -363,6 +363,11 @@ Literals function call). This has the same meaning as ``FormattedValue.value``. * ``str`` is a constant containing the text of the interpolation expression. + + If ``str`` is set to ``None``, then ``value`` is used to generate code + when calling :func:`ast.unparse`. This no longer guarantees that the + generated code is identical to the original and is intended for code + generation. * ``conversion`` is an integer: * -1: no conversion diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index a9543bdd13e83f8..cbca20cba5c2847 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -652,11 +652,11 @@ zlib Optimizations ============= -module_name ------------ - -* TODO +csv +--- +* :meth:`csv.Sniffer.sniff` delimiter detection is now up to 1.6x faster. + (Contributed by Maurycy Pawłowski-Wieroński in :gh:`137628`.) Removed diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index efe9fb3b6c7c6aa..94fcb1d8aee52b4 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -50,27 +50,58 @@ extern "C" { CPython refcounting operations on it! */ +#define Py_INT_TAG 3 +#define Py_TAG_INVALID 2 +#define Py_TAG_REFCNT 1 +#define Py_TAG_BITS 3 -#if !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) +#define Py_TAGGED_SHIFT 2 -#define Py_TAG_BITS 0 +#if !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) PyAPI_FUNC(PyObject *) _Py_stackref_get_object(_PyStackRef ref); PyAPI_FUNC(PyObject *) _Py_stackref_close(_PyStackRef ref, const char *filename, int linenumber); -PyAPI_FUNC(_PyStackRef) _Py_stackref_create(PyObject *obj, const char *filename, int linenumber); +PyAPI_FUNC(_PyStackRef) _Py_stackref_create(PyObject *obj, uint16_t flags, const char *filename, int linenumber); PyAPI_FUNC(void) _Py_stackref_record_borrow(_PyStackRef ref, const char *filename, int linenumber); extern void _Py_stackref_associate(PyInterpreterState *interp, PyObject *obj, _PyStackRef ref); static const _PyStackRef PyStackRef_NULL = { .index = 0 }; -static const _PyStackRef PyStackRef_ERROR = { .index = 2 }; +static const _PyStackRef PyStackRef_ERROR = { .index = (1 << Py_TAGGED_SHIFT) }; -// Use the first 3 even numbers for None, True and False. -// Odd numbers are reserved for (tagged) integers -#define PyStackRef_None ((_PyStackRef){ .index = 4 } ) -#define PyStackRef_False ((_PyStackRef){ .index = 6 }) -#define PyStackRef_True ((_PyStackRef){ .index = 8 }) +#define PyStackRef_None ((_PyStackRef){ .index = (2 << Py_TAGGED_SHIFT) } ) +#define PyStackRef_False ((_PyStackRef){ .index = (3 << Py_TAGGED_SHIFT) }) +#define PyStackRef_True ((_PyStackRef){ .index = (4 << Py_TAGGED_SHIFT) }) + +#define INITIAL_STACKREF_INDEX (5 << Py_TAGGED_SHIFT) + +static inline _PyStackRef +PyStackRef_Wrap(void *ptr) +{ + assert(ptr != NULL); +#ifdef Py_DEBUG + assert(((uint64_t)ptr & Py_TAG_BITS) == 0); + return (_PyStackRef){ .index = ((uint64_t)ptr) | Py_TAG_INVALID }; +#else + return (_PyStackRef){ .index = (uint64_t)ptr }; +#endif +} + +static inline void * +PyStackRef_Unwrap(_PyStackRef ref) +{ +#ifdef Py_DEBUG + assert ((ref.index & Py_TAG_BITS) == Py_TAG_INVALID); + return (void *)(ref.index & ~Py_TAG_BITS); +#else + return (void *)(ref.index); +#endif +} -#define INITIAL_STACKREF_INDEX 10 +static inline int +PyStackRef_RefcountOnObject(_PyStackRef ref) +{ + return (ref.index & Py_TAG_REFCNT) == 0; +} static inline int PyStackRef_IsNull(_PyStackRef ref) @@ -81,7 +112,13 @@ PyStackRef_IsNull(_PyStackRef ref) static inline bool PyStackRef_IsError(_PyStackRef ref) { - return ref.index == 2; + return ref.index == (1 << Py_TAGGED_SHIFT); +} + +static inline bool +PyStackRef_IsMalformed(_PyStackRef ref) +{ + return (ref.index & Py_TAG_BITS) == Py_TAG_INVALID; } static inline bool @@ -112,7 +149,7 @@ PyStackRef_IsNone(_PyStackRef ref) static inline bool PyStackRef_IsTaggedInt(_PyStackRef ref) { - return (ref.index & 1) == 1; + return (ref.index & Py_TAG_BITS) == Py_INT_TAG; } static inline PyObject * @@ -123,50 +160,68 @@ _PyStackRef_AsPyObjectBorrow(_PyStackRef ref, const char *filename, int linenumb _Py_stackref_record_borrow(ref, filename, linenumber); return _Py_stackref_get_object(ref); } - #define PyStackRef_AsPyObjectBorrow(REF) _PyStackRef_AsPyObjectBorrow((REF), __FILE__, __LINE__) static inline PyObject * _PyStackRef_AsPyObjectSteal(_PyStackRef ref, const char *filename, int linenumber) { - return _Py_stackref_close(ref, filename, linenumber); + PyObject *obj = _Py_stackref_close(ref, filename, linenumber); + if (PyStackRef_RefcountOnObject(ref)) { + return obj; + } + return Py_NewRef(obj); } #define PyStackRef_AsPyObjectSteal(REF) _PyStackRef_AsPyObjectSteal((REF), __FILE__, __LINE__) static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj, const char *filename, int linenumber) { - Py_INCREF(obj); - return _Py_stackref_create(obj, filename, linenumber); + assert(obj != NULL); + uint16_t flags = 0; + if (!_Py_IsImmortal(obj)) { + _Py_INCREF_MORTAL(obj); + } else { + flags = Py_TAG_REFCNT; + } + return _Py_stackref_create(obj, flags, filename, linenumber); } #define PyStackRef_FromPyObjectNew(obj) _PyStackRef_FromPyObjectNew(_PyObject_CAST(obj), __FILE__, __LINE__) static inline _PyStackRef _PyStackRef_FromPyObjectSteal(PyObject *obj, const char *filename, int linenumber) { - return _Py_stackref_create(obj, filename, linenumber); + assert(obj != NULL); + uint16_t flags = 0; + if (_Py_IsImmortal(obj)) { + flags = Py_TAG_REFCNT; + } + return _Py_stackref_create(obj, flags, filename, linenumber); } #define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj), __FILE__, __LINE__) static inline _PyStackRef _PyStackRef_FromPyObjectBorrow(PyObject *obj, const char *filename, int linenumber) { - return _Py_stackref_create(obj, filename, linenumber); + return _Py_stackref_create(obj, Py_TAG_REFCNT, filename, linenumber); } #define PyStackRef_FromPyObjectBorrow(obj) _PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj), __FILE__, __LINE__) static inline void _PyStackRef_CLOSE(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsError(ref)); + assert(!PyStackRef_IsNull(ref)); if (PyStackRef_IsTaggedInt(ref)) { return; } PyObject *obj = _Py_stackref_close(ref, filename, linenumber); - Py_DECREF(obj); + assert(Py_REFCNT(obj) > 0); + if (PyStackRef_RefcountOnObject(ref)) { + Py_DECREF(obj); + } } #define PyStackRef_CLOSE(REF) _PyStackRef_CLOSE((REF), __FILE__, __LINE__) - static inline void _PyStackRef_XCLOSE(_PyStackRef ref, const char *filename, int linenumber) { @@ -182,31 +237,46 @@ static inline _PyStackRef _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) { assert(!PyStackRef_IsError(ref)); + assert(!PyStackRef_IsNull(ref)); if (PyStackRef_IsTaggedInt(ref)) { return ref; } - else { - PyObject *obj = _Py_stackref_get_object(ref); + PyObject *obj = _Py_stackref_get_object(ref); + uint16_t flags = 0; + if (PyStackRef_RefcountOnObject(ref)) { Py_INCREF(obj); - return _Py_stackref_create(obj, filename, linenumber); + } else { + flags = Py_TAG_REFCNT; } + return _Py_stackref_create(obj, flags, filename, linenumber); } #define PyStackRef_DUP(REF) _PyStackRef_DUP(REF, __FILE__, __LINE__) -extern void _PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct, const char *filename, int linenumber); -#define PyStackRef_CLOSE_SPECIALIZED(REF, DESTRUCT) _PyStackRef_CLOSE_SPECIALIZED(REF, DESTRUCT, __FILE__, __LINE__) - -static inline _PyStackRef -PyStackRef_MakeHeapSafe(_PyStackRef ref) +static inline void +_PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct, const char *filename, int linenumber) { - return ref; + assert(!PyStackRef_IsError(ref)); + assert(!PyStackRef_IsNull(ref)); + assert(!PyStackRef_IsTaggedInt(ref)); + PyObject *obj = _Py_stackref_close(ref, filename, linenumber); + if (PyStackRef_RefcountOnObject(ref)) { + _Py_DECREF_SPECIALIZED(obj, destruct); + } } +#define PyStackRef_CLOSE_SPECIALIZED(REF, DESTRUCT) _PyStackRef_CLOSE_SPECIALIZED(REF, DESTRUCT, __FILE__, __LINE__) static inline _PyStackRef -PyStackRef_Borrow(_PyStackRef ref) +_PyStackRef_Borrow(_PyStackRef ref, const char *filename, int linenumber) { - return PyStackRef_DUP(ref); + assert(!PyStackRef_IsError(ref)); + assert(!PyStackRef_IsNull(ref)); + if (PyStackRef_IsTaggedInt(ref)) { + return ref; + } + PyObject *obj = _Py_stackref_get_object(ref); + return _Py_stackref_create(obj, Py_TAG_REFCNT, filename, linenumber); } +#define PyStackRef_Borrow(REF) _PyStackRef_Borrow((REF), __FILE__, __LINE__) #define PyStackRef_CLEAR(REF) \ do { \ @@ -219,28 +289,47 @@ PyStackRef_Borrow(_PyStackRef ref) static inline _PyStackRef _PyStackRef_FromPyObjectStealMortal(PyObject *obj, const char *filename, int linenumber) { + assert(obj != NULL); assert(!_Py_IsImmortal(obj)); - return _Py_stackref_create(obj, filename, linenumber); + return _Py_stackref_create(obj, 0, filename, linenumber); } #define PyStackRef_FromPyObjectStealMortal(obj) _PyStackRef_FromPyObjectStealMortal(_PyObject_CAST(obj), __FILE__, __LINE__) static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return true; + if ((ref.index & Py_TAG_BITS) != Py_TAG_REFCNT || PyStackRef_IsNull(ref)) { + // Tagged ints and ERROR are included. + return true; + } + + PyObject *obj = _Py_stackref_get_object(ref); + return _Py_IsImmortal(obj); } +static inline _PyStackRef +_PyStackRef_MakeHeapSafe(_PyStackRef ref, const char *filename, int linenumber) +{ + if (PyStackRef_IsHeapSafe(ref)) { + return ref; + } + + PyObject *obj = _Py_stackref_close(ref, filename, linenumber); + Py_INCREF(obj); + return _Py_stackref_create(obj, 0, filename, linenumber); +} +#define PyStackRef_MakeHeapSafe(REF) _PyStackRef_MakeHeapSafe(REF, __FILE__, __LINE__) + static inline _PyStackRef _PyStackRef_FromPyObjectNewMortal(PyObject *obj, const char *filename, int linenumber) { + assert(obj != NULL); assert(!_Py_IsStaticImmortal(obj)); Py_INCREF(obj); - return _Py_stackref_create(obj, filename, linenumber); + return _Py_stackref_create(obj, 0, filename, linenumber); } #define PyStackRef_FromPyObjectNewMortal(obj) _PyStackRef_FromPyObjectNewMortal(_PyObject_CAST(obj), __FILE__, __LINE__) -#define PyStackRef_RefcountOnObject(REF) 1 - extern int PyStackRef_Is(_PyStackRef a, _PyStackRef b); extern bool PyStackRef_IsTaggedInt(_PyStackRef ref); @@ -257,11 +346,6 @@ PyStackRef_IsNullOrInt(_PyStackRef ref); #else -#define Py_INT_TAG 3 -#define Py_TAG_INVALID 2 -#define Py_TAG_REFCNT 1 -#define Py_TAG_BITS 3 - static const _PyStackRef PyStackRef_ERROR = { .bits = Py_TAG_INVALID }; /* Wrap a pointer in a stack ref. @@ -273,6 +357,7 @@ PyStackRef_Wrap(void *ptr) { assert(ptr != NULL); #ifdef Py_DEBUG + assert(((uintptr_t)ptr & Py_TAG_BITS) == 0); return (_PyStackRef){ .bits = ((uintptr_t)ptr) | Py_TAG_INVALID }; #else return (_PyStackRef){ .bits = (uintptr_t)ptr }; @@ -318,8 +403,8 @@ PyStackRef_IsTaggedInt(_PyStackRef i) static inline _PyStackRef PyStackRef_TagInt(intptr_t i) { - assert(Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, (i << 2), 2) == i); - return (_PyStackRef){ .bits = ((((uintptr_t)i) << 2) | Py_INT_TAG) }; + assert(Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, (i << Py_TAGGED_SHIFT), Py_TAGGED_SHIFT) == i); + return (_PyStackRef){ .bits = ((((uintptr_t)i) << Py_TAGGED_SHIFT) | Py_INT_TAG) }; } static inline intptr_t @@ -327,7 +412,7 @@ PyStackRef_UntagInt(_PyStackRef i) { assert(PyStackRef_IsTaggedInt(i)); intptr_t val = (intptr_t)i.bits; - return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, val, 2); + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, val, Py_TAGGED_SHIFT); } @@ -335,8 +420,8 @@ static inline _PyStackRef PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref) { assert((ref.bits & Py_TAG_BITS) == Py_INT_TAG); // Is tagged int - assert((ref.bits & (~Py_TAG_BITS)) != (INT_MAX & (~Py_TAG_BITS))); // Isn't about to overflow - return (_PyStackRef){ .bits = ref.bits + 4 }; + assert((ref.bits & (~Py_TAG_BITS)) != (INTPTR_MAX & (~Py_TAG_BITS))); // Isn't about to overflow + return (_PyStackRef){ .bits = ref.bits + (1 << Py_TAGGED_SHIFT) }; } #define PyStackRef_IsDeferredOrTaggedInt(ref) (((ref).bits & Py_TAG_REFCNT) != 0) diff --git a/Lib/_ast_unparse.py b/Lib/_ast_unparse.py index 16cf56f62cc1e58..1c8741b5a554833 100644 --- a/Lib/_ast_unparse.py +++ b/Lib/_ast_unparse.py @@ -658,9 +658,9 @@ def _unparse_interpolation_value(self, inner): unparser.set_precedence(_Precedence.TEST.next(), inner) return unparser.visit(inner) - def _write_interpolation(self, node, is_interpolation=False): + def _write_interpolation(self, node, use_str_attr=False): with self.delimit("{", "}"): - if is_interpolation: + if use_str_attr: expr = node.str else: expr = self._unparse_interpolation_value(node.value) @@ -678,7 +678,8 @@ def visit_FormattedValue(self, node): self._write_interpolation(node) def visit_Interpolation(self, node): - self._write_interpolation(node, is_interpolation=True) + # If `str` is set to `None`, use the `value` to generate the source code. + self._write_interpolation(node, use_str_attr=node.str is not None) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/csv.py b/Lib/csv.py index 98eab01429a8ec1..b2aaf5fd9fa91e6 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -362,31 +362,33 @@ def _guess_delimiter(self, data, delimiters): try and evaluate the smallest portion of the data possible, evaluating additional chunks as necessary. """ + from collections import Counter, defaultdict data = list(filter(None, data.split('\n'))) - ascii = [chr(c) for c in range(127)] # 7-bit ASCII - # build frequency tables chunkLength = min(10, len(data)) iteration = 0 - charFrequency = {} + num_lines = 0 + # {char -> {count_per_line -> num_lines_with_that_count}} + char_frequency = defaultdict(Counter) modes = {} delims = {} start, end = 0, chunkLength while start < len(data): iteration += 1 for line in data[start:end]: - for char in ascii: - metaFrequency = charFrequency.get(char, {}) - # must count even if frequency is 0 - freq = line.count(char) - # value is the mode - metaFrequency[freq] = metaFrequency.get(freq, 0) + 1 - charFrequency[char] = metaFrequency - - for char in charFrequency.keys(): - items = list(charFrequency[char].items()) + num_lines += 1 + for char, count in Counter(line).items(): + if char.isascii(): + char_frequency[char][count] += 1 + + for char, counts in char_frequency.items(): + items = list(counts.items()) + missed_lines = num_lines - sum(counts.values()) + if missed_lines: + # Store the number of lines 'char' was missing from. + items.append((0, missed_lines)) if len(items) == 1 and items[0][0] == 0: continue # get the mode of the frequencies diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 1e6f60074308e23..5fdb3a458ae9996 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -3308,6 +3308,15 @@ class MoreFieldsThanTypes(ast.AST): self.assertEqual(obj.a, 1) self.assertEqual(obj.b, 2) + def test_malformed_fields_with_bytes(self): + class BadFields(ast.AST): + _fields = (b'\xff'*64,) + _field_types = {'a': int} + + # This should not crash + with self.assertWarnsRegex(DeprecationWarning, r"Field b'\\xff\\xff.*' .*"): + obj = BadFields() + def test_complete_field_types(self): class _AllFieldTypes(ast.AST): _fields = ('a', 'b') diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 50431b562f90baf..6be6a7ae222f02c 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1437,6 +1437,56 @@ def test_doublequote(self): dialect = sniffer.sniff(self.sample9) self.assertTrue(dialect.doublequote) + def test_guess_delimiter_crlf_not_chosen(self): + # Ensure that we pick the real delimiter ("|") over "\r" in a tie. + sniffer = csv.Sniffer() + sample = "a|b\r\nc|d\r\ne|f\r\n" + self.assertEqual(sniffer.sniff(sample).delimiter, "|") + self.assertNotEqual(sniffer.sniff(sample).delimiter, "\r") + + def test_zero_mode_tie_order_independence(self): + sniffer = csv.Sniffer() + # ":" appears in half the rows (1, 0, 1, 0) - a tie between + # 0 and 1 per line. + # "," appears once every row (true delimiter). + # + # Even if the zero-frequency bucket is appended vs. inserted, the tie + # yields an adjusted score of 0, so ":" should not be promoted and + # "," must be selected. + sample = ( + "a,b:c\n" + "d,e\n" + "f,g:c\n" + "h,i\n" + ) + dialect = sniffer.sniff(sample) + self.assertEqual(dialect.delimiter, ",") + + def test_zero_mode_tie_order_comma_first(self): + sniffer = csv.Sniffer() + pattern = ( + "a,b\n" + "c:d\n" + "e,f\n" + "g:h\n" + ) + sample = pattern * 10 + with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"): + sniffer.sniff(sample) + + def test_zero_mode_tie_order_colon_first(self): + sniffer = csv.Sniffer() + pattern = ( + "a:b\n" + "c,d\n" + "e:f\n" + "g,h\n" + ) + sample = pattern * 10 + with self.assertRaisesRegex(csv.Error, "Could not determine delimiter"): + sniffer.sniff(sample) + + class NUL: def write(s, *args): pass diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py index 54e69277282c301..042aa84b35dcf8c 100644 --- a/Lib/test/test_repl.py +++ b/Lib/test/test_repl.py @@ -5,6 +5,7 @@ import subprocess import sys import unittest +from functools import partial from textwrap import dedent from test import support from test.support import ( @@ -27,7 +28,7 @@ raise unittest.SkipTest("test module requires subprocess") -def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): +def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, custom=False, **kw): """Run the Python REPL with the given arguments. kw is extra keyword args to pass to subprocess.Popen. Returns a Popen @@ -41,7 +42,11 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): # path may be used by PyConfig_Get("module_search_paths") to build the # default module search path. stdin_fname = os.path.join(os.path.dirname(sys.executable), "") - cmd_line = [stdin_fname, '-I', '-i'] + cmd_line = [stdin_fname, '-I'] + # Don't re-run the built-in REPL from interactive mode + # if we're testing a custom REPL (such as the asyncio REPL). + if not custom: + cmd_line.append('-i') cmd_line.extend(args) # Set TERM=vt100, for the rationale see the comments in spawn_python() of @@ -55,6 +60,10 @@ def spawn_repl(*args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kw): stdout=stdout, stderr=stderr, **kw) + +spawn_asyncio_repl = partial(spawn_repl, "-m", "asyncio", custom=True) + + def run_on_interactive_mode(source): """Spawn a new Python interpreter, pass the given input source code from the stdin and return the @@ -359,7 +368,7 @@ def f(): class TestAsyncioREPL(unittest.TestCase): def test_multiple_statements_fail_early(self): user_input = "1 / 0; print(f'afterwards: {1+1}')" - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) output = kill_python(p) self.assertIn("ZeroDivisionError", output) @@ -371,7 +380,7 @@ def test_toplevel_contextvars_sync(self): var = ContextVar("var", default="failed") var.set("ok") """) - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) user_input2 = dedent(""" print(f"toplevel contextvar test: {var.get()}") @@ -387,7 +396,7 @@ def test_toplevel_contextvars_async(self): from contextvars import ContextVar var = ContextVar('var', default='failed') """) - p = spawn_repl("-m", "asyncio") + p = spawn_asyncio_repl() p.stdin.write(user_input) user_input2 = "async def set_var(): var.set('ok')\n" p.stdin.write(user_input2) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 0d6b05bc660b76a..35e4652a87b423c 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -206,6 +206,97 @@ def test_tstrings(self): self.check_ast_roundtrip("t'foo'") self.check_ast_roundtrip("t'foo {bar}'") self.check_ast_roundtrip("t'foo {bar!s:.2f}'") + self.check_ast_roundtrip("t'{a + b}'") + self.check_ast_roundtrip("t'{a + b:x}'") + self.check_ast_roundtrip("t'{a + b!s}'") + self.check_ast_roundtrip("t'{ {a}}'") + self.check_ast_roundtrip("t'{ {a}=}'") + self.check_ast_roundtrip("t'{{a}}'") + self.check_ast_roundtrip("t''") + self.check_ast_roundtrip('t""') + self.check_ast_roundtrip("t'{(lambda x: x)}'") + self.check_ast_roundtrip("t'{t'{x}'}'") + + def test_tstring_with_nonsensical_str_field(self): + # `value` suggests that the original code is `t'{test1}`, but `str` suggests otherwise + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.Name(id="test1", ctx=ast.Load()), str="test2", conversion=-1 + ) + ] + ) + ), + "t'{test2}'", + ) + + def test_tstring_with_none_str_field(self): + self.assertEqual( + ast.unparse( + ast.TemplateStr( + [ast.Interpolation(value=ast.Name(id="test1"), str=None, conversion=-1)] + ) + ), + "t'{test1}'", + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + [ + ast.Interpolation( + value=ast.Lambda( + args=ast.arguments(args=[ast.arg(arg="x")]), + body=ast.Name(id="x"), + ), + str=None, + conversion=-1, + ) + ] + ) + ), + "t'{(lambda x: x)}'", + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.TemplateStr( + # `str` field kept here + [ast.Interpolation(value=ast.Name(id="x"), str="y", conversion=-1)] + ), + str=None, + conversion=-1, + ) + ] + ) + ), + '''t"{t'{y}'}"''', + ) + self.assertEqual( + ast.unparse( + ast.TemplateStr( + values=[ + ast.Interpolation( + value=ast.TemplateStr( + [ast.Interpolation(value=ast.Name(id="x"), str=None, conversion=-1)] + ), + str=None, + conversion=-1, + ) + ] + ) + ), + '''t"{t'{x}'}"''', + ) + self.assertEqual( + ast.unparse(ast.TemplateStr( + [ast.Interpolation(value=ast.Constant(value="foo"), str=None, conversion=114)] + )), + '''t"{'foo'!r}"''', + ) def test_strings(self): self.check_ast_roundtrip("u'foo'") @@ -813,15 +904,6 @@ def test_type_params(self): self.check_ast_roundtrip("def f[T: int = int, **P = int, *Ts = *int]():\n pass") self.check_ast_roundtrip("class C[T: int = int, **P = int, *Ts = *int]():\n pass") - def test_tstr(self): - self.check_ast_roundtrip("t'{a + b}'") - self.check_ast_roundtrip("t'{a + b:x}'") - self.check_ast_roundtrip("t'{a + b!s}'") - self.check_ast_roundtrip("t'{ {a}}'") - self.check_ast_roundtrip("t'{ {a}=}'") - self.check_ast_roundtrip("t'{{a}}'") - self.check_ast_roundtrip("t''") - class ManualASTCreationTestCase(unittest.TestCase): """Test that AST nodes created without a type_params field unparse correctly.""" diff --git a/Misc/ACKS b/Misc/ACKS index 6876380e0ba8d20..f5f15f2eb7ea243 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1921,6 +1921,7 @@ Tim Tisdall Jason Tishler Christian Tismer Jim Tittsler +Abhishek Tiwari Frank J. Tobin James Tocknell Bennett Todd diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-03-17-51-43.gh-issue-139475._684ED.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-03-17-51-43.gh-issue-139475._684ED.rst new file mode 100644 index 000000000000000..f4d50b7d0207a06 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-03-17-51-43.gh-issue-139475._684ED.rst @@ -0,0 +1,2 @@ +Changes in stackref debugging mode when ``Py_STACKREF_DEBUG`` is set. We use +the same pattern of refcounting for stackrefs as in production build. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-11-30-16.gh-issue-135904.3WE5oW.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-11-30-16.gh-issue-135904.3WE5oW.rst new file mode 100644 index 000000000000000..b52a57dba4acaea --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-11-30-16.gh-issue-135904.3WE5oW.rst @@ -0,0 +1,3 @@ +Add special labels to the assembly created during stencil creation to +support relocations that the native object file format does not support. +Specifically, 19 bit branches for AArch64 in Mach-O object files. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst new file mode 100644 index 000000000000000..3d62d210f1f0071 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-17-22-22.gh-issue-140431.m8D_A-.rst @@ -0,0 +1,3 @@ +Fix a crash in Python's :term:`garbage collector ` due to +partially initialized :term:`coroutine` objects when coroutine origin tracking +depth is enabled (:func:`sys.set_coroutine_origin_tracking_depth`). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-23-26-37.gh-issue-140443.wT5i1A.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-23-26-37.gh-issue-140443.wT5i1A.rst new file mode 100644 index 000000000000000..a1fff8fef7ebe24 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-22-23-26-37.gh-issue-140443.wT5i1A.rst @@ -0,0 +1,5 @@ +The logarithm functions (such as :func:`math.log10` and :func:`math.log`) may now produce +slightly different results for extremely large integers that cannot be +converted to floats without overflow. These results are generally more +accurate, with reduced worst-case error and a tighter overall error +distribution. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst new file mode 100644 index 000000000000000..afa9326fff3aeea --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-23-16-05-50.gh-issue-140471.Ax_aXn.rst @@ -0,0 +1,2 @@ +Fix potential buffer overflow in :class:`ast.AST` node initialization when +encountering malformed :attr:`~ast.AST._fields` containing non-:class:`str`. diff --git a/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst b/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst new file mode 100644 index 000000000000000..855070ed6f4511f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-11-04-52-18.gh-issue-137627.Ku5Yi2.rst @@ -0,0 +1 @@ +Speed up :meth:`csv.Sniffer.sniff` delimiter detection by up to 1.6x. diff --git a/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst b/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst new file mode 100644 index 000000000000000..e12f789e6744543 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-23-12-12-22.gh-issue-138774.mnh2gU.rst @@ -0,0 +1,2 @@ +:func:`ast.unparse` now generates full source code when handling +:class:`ast.Interpolation` nodes that do not have a specified source. diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index c631beb9ce5477f..be88841716b0041 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2309,7 +2309,7 @@ loghelper(PyObject* arg, double (*func)(double)) assert(e >= 0); assert(!PyErr_Occurred()); /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */ - result = func(x) + func(2.0) * e; + result = fma(func(2.0), (double)e, func(x)); } else /* Successfully converted x to a double. */ diff --git a/Objects/genobject.c b/Objects/genobject.c index c9ca2f1de51ddc9..2371ad16d5c1a66 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -932,6 +932,7 @@ make_gen(PyTypeObject *type, PyFunctionObject *func) gen->gi_weakreflist = NULL; gen->gi_exc_state.exc_value = NULL; gen->gi_exc_state.previous_item = NULL; + gen->gi_iframe.f_executable = PyStackRef_None; assert(func->func_name != NULL); gen->gi_name = Py_NewRef(func->func_name); assert(func->func_qualname != NULL); diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index dba20226c3283ab..3e252cbc4883d19 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1009,7 +1009,7 @@ def visitModule(self, mod): else { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "Field '%U' is missing from %.400s._field_types. " + "Field %R is missing from %.400s._field_types. " "This will become an error in Python 3.15.", name, Py_TYPE(self)->tp_name ) < 0) { @@ -1044,7 +1044,7 @@ def visitModule(self, mod): // simple field (e.g., identifier) if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "%.400s.__init__ missing 1 required positional argument: '%U'. " + "%.400s.__init__ missing 1 required positional argument: %R. " "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 660bc598a4862c7..aac24ed7d3c0c5b 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5293,7 +5293,7 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) else { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "Field '%U' is missing from %.400s._field_types. " + "Field %R is missing from %.400s._field_types. " "This will become an error in Python 3.15.", name, Py_TYPE(self)->tp_name ) < 0) { @@ -5328,7 +5328,7 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) // simple field (e.g., identifier) if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, - "%.400s.__init__ missing 1 required positional argument: '%U'. " + "%.400s.__init__ missing 1 required positional argument: %R. " "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { diff --git a/Python/stackrefs.c b/Python/stackrefs.c index ecc0012ef17b393..720916e0854f5cf 100644 --- a/Python/stackrefs.c +++ b/Python/stackrefs.c @@ -109,18 +109,19 @@ _Py_stackref_close(_PyStackRef ref, const char *filename, int linenumber) } _PyStackRef -_Py_stackref_create(PyObject *obj, const char *filename, int linenumber) +_Py_stackref_create(PyObject *obj, uint16_t flags, const char *filename, int linenumber) { if (obj == NULL) { Py_FatalError("Cannot create a stackref for NULL"); } PyInterpreterState *interp = PyInterpreterState_Get(); uint64_t new_id = interp->next_stackref; - interp->next_stackref = new_id + 2; + interp->next_stackref = new_id + (1 << Py_TAGGED_SHIFT); TableEntry *entry = make_table_entry(obj, filename, linenumber); if (entry == NULL) { Py_FatalError("No memory left for stackref debug table"); } + new_id |= flags; if (_Py_hashtable_set(interp->open_stackrefs_table, (void *)new_id, entry) < 0) { Py_FatalError("No memory left for stackref debug table"); } @@ -194,16 +195,10 @@ _Py_stackref_report_leaks(PyInterpreterState *interp) } } -void -_PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct, const char *filename, int linenumber) -{ - PyObject *obj = _Py_stackref_close(ref, filename, linenumber); - _Py_DECREF_SPECIALIZED(obj, destruct); -} - _PyStackRef PyStackRef_TagInt(intptr_t i) { - return (_PyStackRef){ .index = (i << 1) + 1 }; + assert(Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, (i << Py_TAGGED_SHIFT), Py_TAGGED_SHIFT) == i); + return (_PyStackRef){ .index = (i << Py_TAGGED_SHIFT) | Py_INT_TAG }; } intptr_t @@ -211,7 +206,7 @@ PyStackRef_UntagInt(_PyStackRef i) { assert(PyStackRef_IsTaggedInt(i)); intptr_t val = (intptr_t)i.index; - return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, val, 1); + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, val, Py_TAGGED_SHIFT); } bool @@ -223,8 +218,9 @@ PyStackRef_IsNullOrInt(_PyStackRef ref) _PyStackRef PyStackRef_IncrementTaggedIntNoOverflow(_PyStackRef ref) { - assert(ref.index <= INT_MAX - 2); // No overflow - return (_PyStackRef){ .index = ref.index + 2 }; + assert(PyStackRef_IsTaggedInt(ref)); + assert((ref.index & (~Py_TAG_BITS)) != (INTPTR_MAX & (~Py_TAG_BITS))); // Isn't about to overflow + return (_PyStackRef){ .index = ref.index + (1 << Py_TAGGED_SHIFT) }; } diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 866417398b0ba53..0adc550ba5e84cc 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -9,7 +9,7 @@ _RE_NEVER_MATCH = re.compile(r"(?!)") # Dictionary mapping branch instructions to their inverted branch instructions. # If a branch cannot be inverted, the value is None: -_X86_BRANCHES = { +_X86_BRANCH_NAMES = { # https://www.felixcloutier.com/x86/jcc "ja": "jna", "jae": "jnae", @@ -37,7 +37,11 @@ "loopz": None, } # Update with all of the inverted branches, too: -_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v} +_X86_BRANCH_NAMES |= {v: k for k, v in _X86_BRANCH_NAMES.items() if v} +# No custom relocations needed +_X86_BRANCHES: dict[str, tuple[str | None, str | None]] = { + k: (v, None) for k, v in _X86_BRANCH_NAMES.items() +} _AARCH64_COND_CODES = { # https://developer.arm.com/documentation/dui0801/b/CJAJIHAD?lang=en @@ -58,12 +62,15 @@ "hi": "ls", "ls": "hi", } +# MyPy doesn't understand that a invariant variable can be initialized by a covariant value +CUSTOM_AARCH64_BRANCH19: str | None = "CUSTOM_AARCH64_BRANCH19" + # Branches are either b.{cond} or bc.{cond} -_AARCH64_BRANCHES = { - "b." + cond: ("b." + inverse if inverse else None) +_AARCH64_BRANCHES: dict[str, tuple[str | None, str | None]] = { + "b." + cond: (("b." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19) for (cond, inverse) in _AARCH64_COND_CODES.items() } | { - "bc." + cond: ("bc." + inverse if inverse else None) + "bc." + cond: (("bc." + inverse if inverse else None), CUSTOM_AARCH64_BRANCH19) for (cond, inverse) in _AARCH64_COND_CODES.items() } @@ -113,7 +120,8 @@ class Optimizer: r'\s*(?P