From 829e4d0b14e077b9a8dac2877483c261aa4bbe1a Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 31 Mar 2026 15:45:23 +0300 Subject: [PATCH 1/3] gh-141510: Support `frozendict` in `plistlib` (#145590) Co-authored-by: Victor Stinner --- Doc/library/plistlib.rst | 2 +- Doc/whatsnew/3.15.rst | 3 ++- Lib/plistlib.py | 16 ++++++++-------- Lib/test/test_plistlib.py | 19 +++++++++++++++++++ ...-03-21-16-03-16.gh-issue-141510.tKptA7.rst | 2 ++ 5 files changed, 32 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-03-21-16-03-16.gh-issue-141510.tKptA7.rst diff --git a/Doc/library/plistlib.rst b/Doc/library/plistlib.rst index fa15cd4267eef4..72140e41675c35 100644 --- a/Doc/library/plistlib.rst +++ b/Doc/library/plistlib.rst @@ -18,7 +18,7 @@ and XML plist files. The property list (``.plist``) file format is a simple serialization supporting basic object types, like dictionaries, lists, numbers and strings. Usually the -top level object is a dictionary. +top level object is a dictionary or a frozen dictionary. To write out and to parse a plist file, use the :func:`dump` and :func:`load` functions. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 462482c80122ea..97937892de3a6e 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -217,7 +217,8 @@ For example:: The following standard library modules have been updated to accept :class:`!frozendict`: :mod:`copy`, :mod:`decimal`, :mod:`json`, :mod:`marshal`, -:mod:`pickle`, :mod:`pprint` and :mod:`xml.etree.ElementTree`. +:mod:`plistlib` (only for serialization), :mod:`pickle`, :mod:`pprint` and +:mod:`xml.etree.ElementTree`. :func:`eval` and :func:`exec` accept :class:`!frozendict` for *globals*, and :func:`type` and :meth:`str.maketrans` accept :class:`!frozendict` for *dict*. diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 01c7aa96261abe..93f3ef5e38af84 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -2,7 +2,7 @@ The property list (.plist) file format is a simple XML pickle supporting basic object types, like dictionaries, lists, numbers and strings. -Usually the top level object is a dictionary. +Usually the top level object is a dictionary or a frozen dictionary. To write out a plist file, use the dump(value, file) function. 'value' is the top level object, 'file' is @@ -357,7 +357,7 @@ def write_value(self, value): elif isinstance(value, float): self.simple_element("real", repr(value)) - elif isinstance(value, dict): + elif isinstance(value, (dict, frozendict)): self.write_dict(value) elif isinstance(value, (bytes, bytearray)): @@ -715,7 +715,7 @@ def _flatten(self, value): self._objidtable[id(value)] = refnum # And finally recurse into containers - if isinstance(value, dict): + if isinstance(value, (dict, frozendict)): keys = [] values = [] items = value.items() @@ -836,7 +836,7 @@ def _write_object(self, value): self._write_size(0xA0, s) self._fp.write(struct.pack('>' + self._ref_format * s, *refs)) - elif isinstance(value, dict): + elif isinstance(value, (dict, frozendict)): keyRefs, valRefs = [], [] if self._sort_keys: @@ -869,18 +869,18 @@ def _is_fmt_binary(header): # Generic bits # -_FORMATS={ - FMT_XML: dict( +_FORMATS=frozendict({ + FMT_XML: frozendict( detect=_is_fmt_xml, parser=_PlistParser, writer=_PlistWriter, ), - FMT_BINARY: dict( + FMT_BINARY: frozendict( detect=_is_fmt_binary, parser=_BinaryPlistParser, writer=_BinaryPlistWriter, ) -} +}) def load(fp, *, fmt=None, dict_type=dict, aware_datetime=False): diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index d9216be4d95658..b9c261310bb567 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -792,6 +792,25 @@ def test_dict_members(self): }) self.assertIsNot(pl2['first'], pl2['second']) + def test_frozendict(self): + pl = frozendict( + aString="Doodah", + anInt=728, + aDict=frozendict( + anotherString="hello", + aTrueValue=True, + ), + aList=["A", "B", 12], + ) + + for fmt in ALL_FORMATS: + with self.subTest(fmt=fmt): + data = plistlib.dumps(pl, fmt=fmt) + pl2 = plistlib.loads(data) + self.assertEqual(pl2, dict(pl)) + self.assertIsInstance(pl2, dict) + self.assertIsInstance(pl2['aDict'], dict) + def test_controlcharacters(self): for i in range(128): c = chr(i) diff --git a/Misc/NEWS.d/next/Library/2026-03-21-16-03-16.gh-issue-141510.tKptA7.rst b/Misc/NEWS.d/next/Library/2026-03-21-16-03-16.gh-issue-141510.tKptA7.rst new file mode 100644 index 00000000000000..19c30f11b33c70 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-21-16-03-16.gh-issue-141510.tKptA7.rst @@ -0,0 +1,2 @@ +Support :class:`frozendict` in :mod:`plistlib`, for serialization only. +Patch by Hugo van Kemenade. From db5936c5b89aa19e04d63120e0cf5bbc73bf2420 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Tue, 31 Mar 2026 16:17:49 +0300 Subject: [PATCH 2/3] gh-143050: Correct PyLong_FromString() to use _PyLong_Negate() (#145901) The long_from_string_base() might return a small integer, when the _pylong.py is used to do conversion. Hence, we must be careful here to not smash it "small int" bit by using the _PyLong_FlipSign(). Co-authored-by: Victor Stinner --- Include/internal/pycore_long.h | 18 +++++++++++++++++- Lib/test/test_capi/test_long.py | 10 ++++++++++ Modules/_testcapi/immortal.c | 4 ++-- Objects/longobject.c | 20 +++++--------------- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 4386e8bcad8841..5ef9cc410e4ebe 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -232,6 +232,20 @@ _PyLong_IsPositive(const PyLongObject *op) return (op->long_value.lv_tag & SIGN_MASK) == 0; } +/* Return true if the argument is a small int */ +static inline bool +_PyLong_IsSmallInt(const PyLongObject *op) +{ + assert(PyLong_Check(op)); + bool is_small_int = (op->long_value.lv_tag & IMMORTALITY_BIT_MASK) != 0; + assert(PyLong_CheckExact(op) || (!is_small_int)); + assert(_Py_IsImmortal(op) || (!is_small_int)); + assert((_PyLong_IsCompact(op) + && _PY_IS_SMALL_INT(_PyLong_CompactValue(op))) + || (!is_small_int)); + return is_small_int; +} + static inline Py_ssize_t _PyLong_DigitCount(const PyLongObject *op) { @@ -293,7 +307,9 @@ _PyLong_SetDigitCount(PyLongObject *op, Py_ssize_t size) #define NON_SIZE_MASK ~(uintptr_t)((1 << NON_SIZE_BITS) - 1) static inline void -_PyLong_FlipSign(PyLongObject *op) { +_PyLong_FlipSign(PyLongObject *op) +{ + assert(!_PyLong_IsSmallInt(op)); unsigned int flipped_sign = 2 - (op->long_value.lv_tag & SIGN_MASK); op->long_value.lv_tag &= NON_SIZE_MASK; op->long_value.lv_tag |= flipped_sign; diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index d3156645eeec2d..fc0454b71cb780 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -803,6 +803,16 @@ def to_digits(num): self.assertEqual(pylongwriter_create(negative, digits), num, (negative, digits)) + def test_bug_143050(self): + with support.adjust_int_max_str_digits(0): + # Bug coming from using _pylong.int_from_string(), that + # currently requires > 6000 decimal digits. + int('-' + '0' * 7000, 10) + _testcapi.test_immortal_small_ints() + # Test also nonzero small int + int('-' + '0' * 7000 + '123', 10) + _testcapi.test_immortal_small_ints() + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/immortal.c b/Modules/_testcapi/immortal.c index af510cab655356..1c87025594a48b 100644 --- a/Modules/_testcapi/immortal.c +++ b/Modules/_testcapi/immortal.c @@ -31,13 +31,13 @@ test_immortal_small_ints(PyObject *self, PyObject *Py_UNUSED(ignored)) for (int i = -5; i <= 1024; i++) { PyObject *obj = PyLong_FromLong(i); assert(verify_immortality(obj)); - int has_int_immortal_bit = ((PyLongObject *)obj)->long_value.lv_tag & IMMORTALITY_BIT_MASK; + int has_int_immortal_bit = _PyLong_IsSmallInt((PyLongObject *)obj); assert(has_int_immortal_bit); } for (int i = 1025; i <= 1030; i++) { PyObject *obj = PyLong_FromLong(i); assert(obj); - int has_int_immortal_bit = ((PyLongObject *)obj)->long_value.lv_tag & IMMORTALITY_BIT_MASK; + int has_int_immortal_bit = _PyLong_IsSmallInt((PyLongObject *)obj); assert(!has_int_immortal_bit); Py_DECREF(obj); } diff --git a/Objects/longobject.c b/Objects/longobject.c index 0d3ea9bc46c321..d416fc1747ecac 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3119,11 +3119,11 @@ PyLong_FromString(const char *str, char **pend, int base) } /* Set sign and normalize */ - if (sign < 0) { - _PyLong_FlipSign(z); - } long_normalize(z); z = maybe_small_long(z); + if (sign < 0) { + _PyLong_Negate(&z); + } if (pend != NULL) { *pend = (char *)str; @@ -3623,21 +3623,11 @@ long_richcompare(PyObject *self, PyObject *other, int op) Py_RETURN_RICHCOMPARE(result, 0, op); } -static inline int -/// Return 1 if the object is one of the immortal small ints -_long_is_small_int(PyObject *op) -{ - PyLongObject *long_object = (PyLongObject *)op; - int is_small_int = (long_object->long_value.lv_tag & IMMORTALITY_BIT_MASK) != 0; - assert((!is_small_int) || PyLong_CheckExact(op)); - return is_small_int; -} - void _PyLong_ExactDealloc(PyObject *self) { assert(PyLong_CheckExact(self)); - if (_long_is_small_int(self)) { + if (_PyLong_IsSmallInt((PyLongObject *)self)) { // See PEP 683, section Accidental De-Immortalizing for details _Py_SetImmortal(self); return; @@ -3652,7 +3642,7 @@ _PyLong_ExactDealloc(PyObject *self) static void long_dealloc(PyObject *self) { - if (_long_is_small_int(self)) { + if (_PyLong_IsSmallInt((PyLongObject *)self)) { /* This should never get called, but we also don't want to SEGV if * we accidentally decref small Ints out of existence. Instead, * since small Ints are immortal, re-set the reference count. From 362145c20ebb08d2f850a49d356ecee858a281ae Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Tue, 31 Mar 2026 23:25:54 +0800 Subject: [PATCH 3/3] gh-139109: Document the trace recording interpreter in internaldocs (GH-146110) --- InternalDocs/jit.md | 59 ++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md index decfccad2d8d37..1345f2db8b34db 100644 --- a/InternalDocs/jit.md +++ b/InternalDocs/jit.md @@ -11,24 +11,54 @@ and this enables optimizations that span multiple instructions. Historically, the adaptive interpreter was referred to as `tier 1` and the JIT as `tier 2`. You will see remnants of this in the code. -## The Optimizer and Executors +## The Trace Recorder and Executors -The program begins running on the adaptive interpreter, until a `JUMP_BACKWARD` -instruction determines that it is "hot" because the counter in its +There are two interpreters in this section: + 1. Adaptive interpreter (the default behavior) + 2. Trace recording interpreter (enabled on JIT builds) + +The program begins running on the adaptive interpreter, until a `JUMP_BACKWARD` or +`RESUME` instruction determines that it is "hot" because the counter in its [inline cache](interpreter.md#inline-cache-entries) indicates that it executed more than some threshold number of times (see [`backoff_counter_triggers`](../Include/internal/pycore_backoff.h)). -It then calls the function `_PyOptimizer_Optimize()` in +It then calls the function `_PyJit_TryInitializeTracing` in [`Python/optimizer.c`](../Python/optimizer.c), passing it the current -[frame](frames.md) and instruction pointer. `_PyOptimizer_Optimize()` -constructs an object of type -[`_PyExecutorObject`](../Include/internal/pycore_optimizer.h) which implements -an optimized version of the instruction trace beginning at this jump. - -The optimizer determines where the trace ends, and the executor is set up +[frame](frames.md), instruction pointer and state. +The interpreter then switches into "tracing mode" via the macro +`ENTER_TRACING()`. On platforms that support computed goto and tail-calling +interpreters, the dispatch table is swapped out, while other platforms that do +not support either use a single flag in the opcode. +Execution between the normal interpreter and tracing interpreter are +interleaved via this dispatch mechanism. This means that while logically +there are two interpreters, the implementation appears to be a single +interpreter. + +During tracing mode, after each interpreter instruction's `DISPATCH()`, +the interpreter jumps to the `TRACE_RECORD` instruction. This instruction +records the previous instruction executed and also any live values of the next +operation it may require. It then translates the previous instruction to +a sequence of micro-ops using `_PyJit_translate_single_bytecode_to_trace`. +To ensure that the adaptive interpreter instructions +and cache entries are up-to-date, the trace recording interpreter always resets +the adaptive counters of adaptive instructions it sees. +This forces a re-specialization of any new instruction should an instruction +deoptimize. Thus, feeding the trace recorder up-to-date information. +Finally, the `TRACE_RECORD` instruction decides when to stop tracing +using various heuristics. + +Once trace recording concludes, `LEAVE_TRACING()` swaps out the dispatch +table/the opcode flag set earlier by `ENTER_TRACING()` is unset. +`stop_tracing_and_jit()` then calls `_PyOptimizer_Optimize()` which optimizes +the trace and constructs an +[`_PyExecutorObject`](../Include/internal/pycore_optimizer.h). + +JIT execution is set up to either return to the adaptive interpreter and resume execution, or transfer control to another executor (see `_PyExitData` in -Include/internal/pycore_optimizer.h). +Include/internal/pycore_optimizer.h). When resuming to the adaptive interpreter, +a "side exit", generated by an `EXIT_IF` may trigger recording of another trace. +While a "deopt", generated by a `DEOPT_IF`, does not trigger recording. The executor is stored on the [`code object`](code_objects.md) of the frame, in the `co_executors` field which is an array of executors. The start @@ -40,12 +70,7 @@ executor in `co_executors`. The micro-op (abbreviated `uop` to approximate `μop`) optimizer is defined in [`Python/optimizer.c`](../Python/optimizer.c) as `_PyOptimizer_Optimize`. -It translates an instruction trace into a sequence of micro-ops by replacing -each bytecode by an equivalent sequence of micro-ops (see -`_PyOpcode_macro_expansion` in -[pycore_opcode_metadata.h](../Include/internal/pycore_opcode_metadata.h) -which is generated from [`Python/bytecodes.c`](../Python/bytecodes.c)). -The micro-op sequence is then optimized by +It takes a micro-op sequence from the trace recorder and optimizes with `_Py_uop_analyze_and_optimize` in [`Python/optimizer_analysis.c`](../Python/optimizer_analysis.c) and an instance of `_PyUOpExecutor_Type` is created to contain it.