From d873fb42f3535165f815c3c8e67d373a8d4cdfc5 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 17 Sep 2025 18:50:16 +0100 Subject: [PATCH 1/4] gh-137838: Move _PyUOpInstruction buffer to PyInterpreterState (gh-138918) --- Include/internal/pycore_interp_structs.h | 3 ++ Include/internal/pycore_optimizer.h | 30 +--------------- Include/internal/pycore_uop.h | 45 ++++++++++++++++++++++++ Makefile.pre.in | 1 + PCbuild/pythoncore.vcxproj.filters | 9 +++++ Python/optimizer.c | 18 ++++++++-- Python/pylifecycle.c | 1 + Python/pystate.c | 11 ++++++ 8 files changed, 87 insertions(+), 31 deletions(-) create mode 100644 Include/internal/pycore_uop.h diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index fa9568ab4d0e85..4c55770e01da4f 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -14,6 +14,7 @@ extern "C" { #include "pycore_structs.h" // PyHamtObject #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_typedefs.h" // _PyRuntimeState +#include "pycore_uop.h" // struct _PyUOpInstruction #define CODE_MAX_WATCHERS 8 @@ -949,6 +950,8 @@ struct _is { struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; + bool compiling; + struct _PyUOpInstruction *jit_uop_buffer; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 94d01999f68d9d..c1a6b7abbf5de4 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "pycore_typedefs.h" // _PyInterpreterFrame +#include "pycore_uop.h" // _PyUOpInstruction #include "pycore_uop_ids.h" #include "pycore_stackref.h" // _PyStackRef #include @@ -41,32 +42,6 @@ typedef struct { PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). } _PyVMData; -/* Depending on the format, - * the 32 bits between the oparg and operand are: - * UOP_FORMAT_TARGET: - * uint32_t target; - * UOP_FORMAT_JUMP - * uint16_t jump_target; - * uint16_t error_target; - */ -typedef struct { - uint16_t opcode:15; - uint16_t format:1; - uint16_t oparg; - union { - uint32_t target; - struct { - uint16_t jump_target; - uint16_t error_target; - }; - }; - uint64_t operand0; // A cache entry - uint64_t operand1; -#ifdef Py_STATS - uint64_t execution_count; -#endif -} _PyUOpInstruction; - typedef struct _PyExitData { uint32_t target; uint16_t index; @@ -118,9 +93,6 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); // trace_run_counter is greater than this value. #define JIT_CLEANUP_THRESHOLD 100000 -// This is the length of the trace we project initially. -#define UOP_MAX_TRACE_LENGTH 1200 - #define TRACE_STACK_SIZE 5 int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h new file mode 100644 index 00000000000000..4abefd3b95d21a --- /dev/null +++ b/Include/internal/pycore_uop.h @@ -0,0 +1,45 @@ +#ifndef Py_CORE_UOP_H +#define Py_CORE_UOP_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct _PyUOpInstruction{ + uint16_t opcode:15; + uint16_t format:1; + uint16_t oparg; + union { + uint32_t target; + struct { + uint16_t jump_target; + uint16_t error_target; + }; + }; + uint64_t operand0; // A cache entry + uint64_t operand1; +#ifdef Py_STATS + uint64_t execution_count; +#endif +} _PyUOpInstruction; + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 1200 +#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UOP_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 8bc12bc1d46deb..a2a5f10585d27a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1435,6 +1435,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ $(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_uniqueid.h \ + $(srcdir)/Include/internal/pycore_uop.h \ $(srcdir)/Include/internal/pycore_uop_ids.h \ $(srcdir)/Include/internal/pycore_uop_metadata.h \ $(srcdir)/Include/internal/pycore_warnings.h \ diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index e9eedfd1312fae..1868b222f18534 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -882,6 +882,15 @@ Include\internal + + Include\internal + + + Include\internal + + + Include\internal + Include\internal\mimalloc diff --git a/Python/optimizer.c b/Python/optimizer.c index b82c790ffa9e69..6f27b521018c8c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -116,7 +116,10 @@ _PyOptimizer_Optimize( _PyExecutorObject **executor_ptr, int chain_depth) { _PyStackRef *stack_pointer = frame->stackpointer; - assert(_PyInterpreterState_GET()->jit); + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(interp->jit); + assert(!interp->compiling); + interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if @@ -126,10 +129,12 @@ _PyOptimizer_Optimize( PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); if (progress_needed && !has_space_for_executor(code, start)) { + interp->compiling = false; return 0; } int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); if (err <= 0) { + interp->compiling = false; return err; } assert(*executor_ptr != NULL); @@ -143,6 +148,7 @@ _PyOptimizer_Optimize( * it might get confused by the executor disappearing, * but there is not much we can do about that here. */ Py_DECREF(*executor_ptr); + interp->compiling = false; return 0; } insert_executor(code, start, index, *executor_ptr); @@ -152,6 +158,7 @@ _PyOptimizer_Optimize( } (*executor_ptr)->vm_data.chain_depth = chain_depth; assert((*executor_ptr)->vm_data.valid); + interp->compiling = false; return 1; } @@ -1280,7 +1287,14 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->jit_uop_buffer == NULL) { + interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); + if (interp->jit_uop_buffer == NULL) { + return 0; + } + } + _PyUOpInstruction *buffer = interp->jit_uop_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 889f035b8d46d0..8920784a7b7eac 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1702,6 +1702,7 @@ finalize_modules(PyThreadState *tstate) // Invalidate all executors and turn off JIT: interp->jit = false; + interp->compiling = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); #endif diff --git a/Python/pystate.c b/Python/pystate.c index 133e0e34927627..f43989b0181b1a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -22,6 +22,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_stackref.h" // Py_STACKREF_DEBUG #include "pycore_time.h" // _PyTime_Init() +#include "pycore_uop.h" // UOP_BUFFER_SIZE #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() @@ -550,6 +551,11 @@ init_interpreter(PyInterpreterState *interp, #ifdef Py_GIL_DISABLED _Py_brc_init_state(interp); #endif + +#ifdef _Py_TIER2 + // Ensure the buffer is to be set as NULL. + interp->jit_uop_buffer = NULL; +#endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); interp->asyncio_tasks_lock = (PyMutex){0}; @@ -565,6 +571,7 @@ init_interpreter(PyInterpreterState *interp, } interp->_code_object_generation = 0; interp->jit = false; + interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; interp->executor_deletion_list_remaining_capacity = 0; @@ -797,6 +804,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); + if (interp->jit_uop_buffer != NULL) { + _PyObject_VirtualFree(interp->jit_uop_buffer, UOP_BUFFER_SIZE); + interp->jit_uop_buffer = NULL; + } #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); From b42af37ceddd05ffba6f561b1b534e93a4ad2505 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Wed, 17 Sep 2025 13:25:24 -0700 Subject: [PATCH 2/4] GH-138355: Remove trash_delete_later from _gc_runtime_state (#138767) Remove trash_delete_later and trash_delete_nesting from _gc_runtime_state. --- Include/cpython/pystate.h | 5 +++++ Include/internal/pycore_interp_structs.h | 6 ------ Objects/object.c | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 555641943423b4..bd9d8aaefe5400 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -157,6 +157,11 @@ struct _ts { */ unsigned long native_thread_id; + /* List of objects that still need to be cleaned up, singly linked + * via their gc headers' gc_next pointers. The list is populated by + * _PyTrash_thread_deposit_object and cleaned up by + * _PyTrash_thread_destroy_chain. + */ PyObject *delete_later; /* Tagged pointer to top-most critical section, or zero if there is no diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 4c55770e01da4f..0e039de8ae05b3 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -203,12 +203,6 @@ enum _GCPhase { #define NUM_GENERATIONS 3 struct _gc_runtime_state { - /* List of objects that still need to be cleaned up, singly linked - * via their gc headers' gc_prev pointers. */ - PyObject *trash_delete_later; - /* Current call-stack depth of tp_dealloc calls. */ - int trash_delete_nesting; - /* Is automatic collection enabled? */ int enabled; int debug; diff --git a/Objects/object.c b/Objects/object.c index 1f10c2531fead1..0540112d7d2acf 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -3051,7 +3051,7 @@ Py_ReprLeave(PyObject *obj) /* Trashcan support. */ -/* Add op to the gcstate->trash_delete_later list. Called when the current +/* Add op to the tstate->delete_later list. Called when the current * call-stack depth gets large. op must be a gc'ed object, with refcount 0. * Py_DECREF must already have been called on it. */ @@ -3077,7 +3077,7 @@ _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) tstate->delete_later = op; } -/* Deallocate all the objects in the gcstate->trash_delete_later list. +/* Deallocate all the objects in the tstate->delete_later list. * Called when the call-stack unwinds again. */ void _PyTrash_thread_destroy_chain(PyThreadState *tstate) From 5a15e7378996358848394930343e9633b6fec8a9 Mon Sep 17 00:00:00 2001 From: Denis Sergeev Date: Thu, 18 Sep 2025 00:45:52 +0300 Subject: [PATCH 3/4] gh-138813: Fix mutable default kwargs={} in multiprocessing BaseProcess and DummyProcess to use None (GH-138814) * gh-138813: Default `BaseProcess` `kwargs` to `None` (#138814) Set `BaseProcess.__init__(..., kwargs=None)` and initialize `kwargs` with `dict(kwargs) if kwargs else {}`. This avoids a shared mutable default and matches threading.Thread behavior. Co-authored-by: Dmitrii Chuprov * DummyProcess kwargs=None (which threading.Thread accepts properly) Co-authored-by: Gregory P. Smith --- Lib/multiprocessing/dummy/__init__.py | 2 +- Lib/multiprocessing/process.py | 4 ++-- Lib/test/_test_multiprocessing.py | 17 +++++++++++++++++ ...25-09-17-08-32-43.gh-issue-138813.LHkHjX.rst | 1 + 4 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-09-17-08-32-43.gh-issue-138813.LHkHjX.rst diff --git a/Lib/multiprocessing/dummy/__init__.py b/Lib/multiprocessing/dummy/__init__.py index 6a1468609e347b..7dc5d1c8dde848 100644 --- a/Lib/multiprocessing/dummy/__init__.py +++ b/Lib/multiprocessing/dummy/__init__.py @@ -33,7 +33,7 @@ class DummyProcess(threading.Thread): - def __init__(self, group=None, target=None, name=None, args=(), kwargs={}): + def __init__(self, group=None, target=None, name=None, args=(), kwargs=None): threading.Thread.__init__(self, group, target, name, args, kwargs) self._pid = None self._children = weakref.WeakKeyDictionary() diff --git a/Lib/multiprocessing/process.py b/Lib/multiprocessing/process.py index 9db322be1aa6d6..262513f295fde5 100644 --- a/Lib/multiprocessing/process.py +++ b/Lib/multiprocessing/process.py @@ -77,7 +77,7 @@ class BaseProcess(object): def _Popen(self): raise NotImplementedError - def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, + def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None): assert group is None, 'group argument must be None for now' count = next(_process_counter) @@ -89,7 +89,7 @@ def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, self._closed = False self._target = target self._args = tuple(args) - self._kwargs = dict(kwargs) + self._kwargs = dict(kwargs) if kwargs else {} self._name = name or type(self).__name__ + '-' + \ ':'.join(str(i) for i in self._identity) if daemon is not None: diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index d9e572961152b3..850744e47d0e0b 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -5278,6 +5278,23 @@ def test_invalid_handles(self): multiprocessing.connection.Connection, -1) +# +# Regression tests for BaseProcess kwargs handling +# + +class TestBaseProcessKwargs(unittest.TestCase): + def test_default_kwargs_not_shared_between_instances(self): + # Creating multiple Process instances without passing kwargs + # must create independent empty dicts (no shared state). + p1 = multiprocessing.Process(target=lambda: None) + p2 = multiprocessing.Process(target=lambda: None) + self.assertIsInstance(p1._kwargs, dict) + self.assertIsInstance(p2._kwargs, dict) + self.assertIsNot(p1._kwargs, p2._kwargs) + # Mutating one should not affect the other + p1._kwargs['x'] = 1 + self.assertNotIn('x', p2._kwargs) + @hashlib_helper.requires_hashdigest('sha256') class OtherTest(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2025-09-17-08-32-43.gh-issue-138813.LHkHjX.rst b/Misc/NEWS.d/next/Library/2025-09-17-08-32-43.gh-issue-138813.LHkHjX.rst new file mode 100644 index 00000000000000..97f4d76bb2f454 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-17-08-32-43.gh-issue-138813.LHkHjX.rst @@ -0,0 +1 @@ +:class:`!multiprocessing.BaseProcess` defaults ``kwargs`` to ``None`` instead of a shared dictionary. From b485e50fde3be08d796a2dac66cb822da1226bb3 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 17 Sep 2025 16:50:15 -0500 Subject: [PATCH 4/4] gh-139074: Fix missing high precision case in sumprod() (gh-139075) --- Lib/test/test_math.py | 4 ++- ...-09-17-13-21-26.gh-issue-139074.dVZO5F.rst | 2 ++ Modules/mathmodule.c | 29 +++++++++---------- 3 files changed, 19 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-09-17-13-21-26.gh-issue-139074.dVZO5F.rst diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index e3b0d4fa9eeeb3..92326a46c33963 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1485,7 +1485,9 @@ def __rmul__(self, other): # Error cases that arose during development args = ((-5, -5, 10), (1.5, 4611686018427387904, 2305843009213693952)) - self.assertEqual(sumprod(*args), 0.0) + self.assertEqual(sumprod(*args), -7.5) + self.assertEqual(sumprod([-0.01, 1, -1, 0.01], [1, 1, 1, 1]), 0.0) + self.assertEqual(sumprod([1, 1, 1, 1], [-0.01, 1, -1, 0.01], ), 0.0) @requires_IEEE_754 @unittest.skipIf(HAVE_DOUBLE_ROUNDING, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-09-17-13-21-26.gh-issue-139074.dVZO5F.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-17-13-21-26.gh-issue-139074.dVZO5F.rst new file mode 100644 index 00000000000000..56c9f21296d3ac --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-09-17-13-21-26.gh-issue-139074.dVZO5F.rst @@ -0,0 +1,2 @@ +Fixed a missing case in :func:`math.sumprod` where a low precision path was +taken when an int/int input pair followed a float input. diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index c631beb9ce5477..d8bf9e76cdd082 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2937,32 +2937,31 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) if (!finished) { double flt_p, flt_q; - bool p_type_float = PyFloat_CheckExact(p_i); - bool q_type_float = PyFloat_CheckExact(q_i); - if (p_type_float && q_type_float) { - flt_p = PyFloat_AS_DOUBLE(p_i); - flt_q = PyFloat_AS_DOUBLE(q_i); - } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) { - /* We care about float/int pairs and int/float pairs because - they arise naturally in several use cases such as price - times quantity, measurements with integer weights, or - data selected by a vector of bools. */ + + if (PyFloat_CheckExact(p_i)) { flt_p = PyFloat_AS_DOUBLE(p_i); - flt_q = PyLong_AsDouble(q_i); - if (flt_q == -1.0 && PyErr_Occurred()) { + } else if (PyLong_CheckExact(p_i) || PyBool_Check(p_i)) { + flt_p = PyLong_AsDouble(p_i); + if (flt_p == -1.0 && PyErr_Occurred()) { PyErr_Clear(); goto finalize_flt_path; } - } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) { + } else { + goto finalize_flt_path; + } + + if (PyFloat_CheckExact(q_i)) { flt_q = PyFloat_AS_DOUBLE(q_i); - flt_p = PyLong_AsDouble(p_i); - if (flt_p == -1.0 && PyErr_Occurred()) { + } else if (PyLong_CheckExact(q_i) || PyBool_Check(q_i)) { + flt_q = PyLong_AsDouble(q_i); + if (flt_q == -1.0 && PyErr_Occurred()) { PyErr_Clear(); goto finalize_flt_path; } } else { goto finalize_flt_path; } + TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total); if (isfinite(new_flt_total.hi)) { flt_total = new_flt_total;