diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 3fe1fdaa1589b6..9ab3c89d78c785 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -943,7 +943,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; - bool jit; + uint8_t jit; bool compiling; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 6a0fc1a59e7965..808cbe49f52385 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -47,6 +47,8 @@ typedef struct _PyExitData { typedef struct _PyExecutorObject { PyObject_VAR_HEAD const _PyUOpInstruction *trace; + // The interpreter this executor belongs to. + PyInterpreterState *interp; _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ uint32_t exit_count; uint32_t code_size; @@ -356,8 +358,6 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, void _PyJit_FinalizeTracing(PyThreadState *tstate); -void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj); - #ifdef __cplusplus } #endif diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index ea1606fd5b5f05..19c8f4e244a97b 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -9,9 +9,13 @@ import _opcode -from test.support import (script_helper, requires_specialization, - import_helper, Py_GIL_DISABLED, requires_jit_enabled, - reset_code) +from test.support import ( + script_helper, + import_helper, + Py_GIL_DISABLED, + requires_jit_enabled, + reset_code +) _testinternalcapi = import_helper.import_module("_testinternalcapi") @@ -61,8 +65,6 @@ def get_opnames(ex): return list(iter_opnames(ex)) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled class TestExecutorInvalidation(unittest.TestCase): @@ -130,8 +132,6 @@ def f(): self.assertIsNone(exe) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUops(unittest.TestCase): @@ -434,8 +434,6 @@ def testfunc(n, m): self.assertIn("_FOR_ITER_TIER_TWO", uops) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUopsOptimization(unittest.TestCase): @@ -2138,6 +2136,7 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_INT", uops) self.assertIn("_POP_TOP_NOP", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants") def test_call_len_known_length_small_int(self): # Make sure that len(t) is optimized for a tuple of length 5. # See https://github.com/python/cpython/issues/139393. @@ -2162,6 +2161,7 @@ def testfunc(n): self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants") def test_call_len_known_length(self): # Make sure that len(t) is not optimized for a tuple of length 2048. # See https://github.com/python/cpython/issues/139393. @@ -2848,6 +2848,7 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.") def test_pop_top_specialize_int(self): def testfunc(n): for _ in range(n): @@ -2861,6 +2862,7 @@ def testfunc(n): self.assertIn("_POP_TOP_INT", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.") def test_pop_top_specialize_float(self): def testfunc(n): for _ in range(n): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst new file mode 100644 index 00000000000000..5db6330ca540b0 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst @@ -0,0 +1,3 @@ +Add free-threading support to the JIT. The JIT is only enabled on +single-threaded code in free-threading, and is disabled when multiple +threads are spawned. Patch by Ken Jin. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index a7fbb0f87b6e9c..3afd7c630cd140 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1240,8 +1240,7 @@ add_executor_dependency(PyObject *self, PyObject *args) static PyObject * invalidate_executors(PyObject *self, PyObject *obj) { - PyInterpreterState *interp = PyInterpreterState_Get(); - _Py_Executors_InvalidateDependency(interp, obj, 1); + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), obj, 1); Py_RETURN_NONE; } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3aea2038fd17e7..b61d86d110fb1b 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2432,7 +2432,7 @@ code_dealloc(PyObject *self) PyMem_Free(co_extra); } #ifdef _Py_TIER2 - _PyJit_Tracer_InvalidateDependency(tstate, self); + _Py_Executors_InvalidateDependency(tstate->interp, self, 1); if (co->co_executors != NULL) { clear_executors(co); } @@ -3363,8 +3363,12 @@ deopt_code_unit(PyCodeObject *code, int i) inst.op.code = _PyOpcode_Deopt[opcode]; assert(inst.op.code < MIN_SPECIALIZED_OPCODE); } - // JIT should not be enabled with free-threading - assert(inst.op.code != ENTER_EXECUTOR); + if (inst.op.code == ENTER_EXECUTOR) { + _PyExecutorObject *exec = code->co_executors->executors[inst.op.arg]; + assert(exec != NULL); + inst.op.code = exec->vm_data.opcode; + inst.op.arg = exec->vm_data.oparg; + } return inst; } diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 3c0b454503be66..086ac5b1d6a178 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -262,7 +262,6 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) #if _Py_TIER2 _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), co, 1); - _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), co); #endif _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index b659ac8023373b..e11db261bc23f3 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -11,7 +11,7 @@ #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stats.h" #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() -#include "pycore_optimizer.h" // _PyJit_Tracer_InvalidateDependency +#include "pycore_optimizer.h" // _Py_Executors_InvalidateDependency static const char * func_event_name(PyFunction_WatchEvent event) { @@ -298,7 +298,7 @@ functions is running. */ -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 static inline struct _func_version_cache_item * get_cache_item(PyInterpreterState *interp, uint32_t version) { @@ -315,11 +315,13 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) // This should only be called from MAKE_FUNCTION. No code is specialized // based on the version, so we do not need to stop the world to set it. func->func_version = version; -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, version); slot->func = func; slot->code = func->func_code; + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif } @@ -330,13 +332,15 @@ func_clear_version(PyInterpreterState *interp, PyFunctionObject *func) // Version was never set or has already been cleared. return; } -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, func->func_version); if (slot->func == func) { slot->func = NULL; // Leave slot->code alone, there may be use for it. } + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif func->func_version = FUNC_VERSION_CLEARED; } @@ -358,8 +362,9 @@ _PyFunction_ClearVersion(PyFunctionObject *func) void _PyFunction_ClearCodeByVersion(uint32_t version) { -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, version); if (slot->code) { assert(PyCode_Check(slot->code)); @@ -369,15 +374,17 @@ _PyFunction_ClearCodeByVersion(uint32_t version) slot->func = NULL; } } + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif } PyFunctionObject * _PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) { -#ifdef Py_GIL_DISABLED - return NULL; -#else +#if _Py_TIER2 + // This function does not need locking/atomics as it can only be + // called from the optimizer, which is currently disabled + // when there are multiple threads. PyInterpreterState *interp = _PyInterpreterState_GET(); struct _func_version_cache_item *slot = get_cache_item(interp, version); if (slot->code) { @@ -395,12 +402,18 @@ _PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) return slot->func; } return NULL; +#else + return NULL; #endif } uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) { + // This function does not need locking/atomics as it can only be + // called from the specializing interpreter or optimizer. + // The specializing interpreter holds a strong reference to the function. + // The optimizer is currently disabled when there are multiple threads. return func->func_version; } @@ -1153,7 +1166,6 @@ func_dealloc(PyObject *self) } #if _Py_TIER2 _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), self, 1); - _PyJit_Tracer_InvalidateDependency(_PyThreadState_GET(), self); #endif _PyObject_GC_UNTRACK(op); FT_CLEAR_WEAKREFS(self, op->func_weakreflist); diff --git a/Objects/listobject.c b/Objects/listobject.c index 4a98c8e54ab03f..20092e122cafc1 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -79,7 +79,9 @@ ensure_shared_on_resize(PyListObject *self) // We can't use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here because // the `CALL_LIST_APPEND` bytecode handler may lock the list without // a critical section. - assert(Py_REFCNT(self) == 1 || PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); + assert(Py_REFCNT(self) == 1 || + (_Py_IsOwnedByCurrentThread((PyObject *)self) && !_PyObject_GC_IS_SHARED(self)) || + PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); // Ensure that the list array is freed using QSBR if we are not the // owning thread. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7f5149aeece12b..43b45689eaa477 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1148,8 +1148,9 @@ static void set_version_unlocked(PyTypeObject *tp, unsigned int version) { assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED)); -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + BEGIN_TYPE_LOCK(); // lookup the old version and set to null if (tp->tp_version_tag != 0) { PyTypeObject **slot = @@ -1157,6 +1158,8 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) + (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE); *slot = NULL; } +#endif +#ifndef Py_GIL_DISABLED if (version) { tp->tp_versions_used++; } @@ -1166,13 +1169,14 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) } #endif FT_ATOMIC_STORE_UINT_RELAXED(tp->tp_version_tag, version); -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 if (version != 0) { PyTypeObject **slot = interp->types.type_version_cache + (version % TYPE_VERSION_CACHE_SIZE); *slot = tp; } + END_TYPE_LOCK(); #endif } @@ -1357,9 +1361,12 @@ _PyType_SetVersion(PyTypeObject *tp, unsigned int version) PyTypeObject * _PyType_LookupByVersion(unsigned int version) { -#ifdef Py_GIL_DISABLED +#ifndef _Py_TIER2 return NULL; #else + // This function does not need locking/atomics as it can only be + // called from the optimizer, which is currently disabled + // when there are multiple threads. PyInterpreterState *interp = _PyInterpreterState_GET(); PyTypeObject **slot = interp->types.type_version_cache diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ea09c0645aa39c..04b743ac6829fd 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2937,9 +2937,9 @@ dummy_func( }; specializing tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (this_instr->op.code == JUMP_BACKWARD) { - uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + uint8_t desired = FT_ATOMIC_LOAD_UINT8(tstate->interp->jit) ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); // Need to re-dispatch so the warmup counter isn't off by one: next_instr = this_instr; @@ -3282,11 +3282,9 @@ dummy_func( // Only used by Tier 2 op(_GUARD_NOT_EXHAUSTED_LIST, (iter, null_or_index -- iter, null_or_index)) { -#ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)); -#endif } replaced op(_ITER_NEXT_LIST, (iter, null_or_index -- iter, null_or_index, next)) { @@ -5203,6 +5201,19 @@ dummy_func( } tier2 op(_CHECK_VALIDITY, (--)) { + // For FT: + // This doesn't need atomics (for now) as there is only a single time + // where a write from another thread is possible: + // when a new thread is spawned and it invalidates all current + // executors. + // The new thread can only be created by an executing uop prior to the + // _CHECK_VALIDITY check. New thread creation is synchronized by + // locking of the runtime, and the current thread is naturally + // paused/waiting for the new thread to be created. Thus, + // there is a strict happens-before relation between that + // uop's invalidation of validity and this check. + // So for now, while the JIT does not run on multiple threads, + // it is safe for this to be non-atomic. DEOPT_IF(!current_executor->vm_data.valid); } diff --git a/Python/ceval.c b/Python/ceval.c index 924afaa97443cb..040ffbd8fbb378 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1719,7 +1719,7 @@ _PyTier2Interpreter( for (;;) { uopcode = next_uop->opcode; #ifdef Py_DEBUG - if (frame->lltrace >= 3) { + if (frame->lltrace >= 4) { dump_stack(frame, stack_pointer); printf(" cache=["); dump_cache_item(_tos_cache0, 0, current_cached_values); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4e8fa34c0b2c0d..ea9fb0b20ac347 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -10322,7 +10322,6 @@ _PyStackRef iter; null_or_index = stack_pointer[-1]; iter = stack_pointer[-2]; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -10330,7 +10329,6 @@ SET_CURRENT_CACHED_VALUES(0); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache1 = null_or_index; _tos_cache0 = iter; SET_CURRENT_CACHED_VALUES(2); @@ -10348,7 +10346,6 @@ _PyStackRef _stack_item_0 = _tos_cache0; null_or_index = _stack_item_0; iter = stack_pointer[-1]; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -10357,7 +10354,6 @@ SET_CURRENT_CACHED_VALUES(1); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache1 = null_or_index; _tos_cache0 = iter; SET_CURRENT_CACHED_VALUES(2); @@ -10376,7 +10372,6 @@ _PyStackRef _stack_item_1 = _tos_cache1; null_or_index = _stack_item_1; iter = _stack_item_0; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -10386,7 +10381,6 @@ SET_CURRENT_CACHED_VALUES(2); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache1 = null_or_index; _tos_cache0 = iter; SET_CURRENT_CACHED_VALUES(2); @@ -10404,7 +10398,6 @@ _PyStackRef _stack_item_2 = _tos_cache2; null_or_index = _stack_item_2; iter = _stack_item_1; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -10415,7 +10408,6 @@ SET_CURRENT_CACHED_VALUES(3); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache2 = null_or_index; _tos_cache1 = iter; _tos_cache0 = _stack_item_0; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e63852aee1134c..62eb10984cb239 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7247,9 +7247,9 @@ /* Skip 1 cache entry */ // _SPECIALIZE_JUMP_BACKWARD { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (this_instr->op.code == JUMP_BACKWARD) { - uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + uint8_t desired = FT_ATOMIC_LOAD_UINT8(tstate->interp->jit) ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); next_instr = this_instr; DISPATCH_SAME_OPARG(); diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 28bbe1d82a3b88..632b17632ef0ed 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1785,7 +1785,6 @@ force_instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) _PyCode_Clear_Executors(code); } _Py_Executors_InvalidateDependency(interp, code, 1); - _PyJit_Tracer_InvalidateDependency(PyThreadState_GET(), code); #endif int code_len = (int)Py_SIZE(code); /* Exit early to avoid creating instrumentation diff --git a/Python/optimizer.c b/Python/optimizer.c index b497ac629960ac..c25f6c070977eb 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -140,7 +140,6 @@ _PyOptimizer_Optimize( } assert(!interp->compiling); assert(_tstate->jit_tracer_state.initial_state.stack_depth >= 0); -#ifndef Py_GIL_DISABLED assert(_tstate->jit_tracer_state.initial_state.func != NULL); interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* @@ -198,9 +197,6 @@ _PyOptimizer_Optimize( } interp->compiling = false; return 1; -#else - return 0; -#endif } static _PyExecutorObject * @@ -473,7 +469,11 @@ static PyMethodDef uop_executor_methods[] = { static int executor_is_gc(PyObject *o) { +#ifdef Py_GIL_DISABLED + return 1; +#else return !_Py_IsImmortal(o); +#endif } PyTypeObject _PyUOpExecutor_Type = { @@ -1647,8 +1647,11 @@ unlink_executor(_PyExecutorObject *executor) prev->vm_data.links.next = next; } else { - // prev == NULL implies that executor is the list head - PyInterpreterState *interp = PyInterpreterState_Get(); + // prev == NULL often implies that executor is the list head + // Note that we should *not* get the current interpreter, as + // that may not always correspond to the interpreter this executor + // belongs to. + PyInterpreterState *interp = executor->interp; assert(interp->executor_list_head == executor); interp->executor_list_head = next; } @@ -1663,6 +1666,7 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) { executor->vm_data.bloom.bits[i] = dependency_set->bits[i]; } + executor->interp = _PyInterpreterState_GET(); link_executor(executor); } @@ -1777,12 +1781,20 @@ _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) _Py_BloomFilter_Add(&executor->vm_data.bloom, obj); } +static void jit_tracer_invalidate_dependency(PyThreadState *tstate, void *obj); + /* Invalidate all executors that depend on `obj` * May cause other executors to be invalidated as well */ void _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation) { + + // It doesn't matter if we don't invalidate all threads. + // If more threads are spawned, we force the jit not to compile anyways + // so the trace gets abandoned. + jit_tracer_invalidate_dependency(_PyThreadState_GET(), obj); + _PyBloomFilter obj_filter; _Py_BloomFilter_Init(&obj_filter); _Py_BloomFilter_Add(&obj_filter, obj); @@ -1820,8 +1832,8 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is _Py_Executors_InvalidateAll(interp, is_invalidation); } -void -_PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj) +static void +jit_tracer_invalidate_dependency(PyThreadState *tstate, void *obj) { _PyBloomFilter obj_filter; _Py_BloomFilter_Init(&obj_filter); @@ -2029,4 +2041,4 @@ _PyExecutor_Free(struct _PyExecutorObject *self) Py_UNREACHABLE(); } -#endif /* _Py_TIER2 */ +#endif /* _Py_TIER2 */ \ No newline at end of file diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 81479a8f28e319..318a7813d680ad 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -114,7 +114,7 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj, bool pop) if (res == NULL) { return NULL; } - if (_Py_IsImmortal(res)) { + if (_Py_IsImmortal(res) || _PyObject_HasDeferredRefcount(res)) { inst->opcode = pop ? _POP_TOP_LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE_BORROW; } else { @@ -243,14 +243,14 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) static JitOptRef lookup_attr(JitOptContext *ctx, _PyBloomFilter *dependencies, _PyUOpInstruction *this_instr, - PyTypeObject *type, PyObject *name, uint16_t immortal, + PyTypeObject *type, PyObject *name, uint16_t deferred_refcount, uint16_t mortal) { // The cached value may be dead, so we need to do the lookup again... :( if (type && PyType_Check(type)) { PyObject *lookup = _PyType_Lookup(type, name); if (lookup) { - int opcode = _Py_IsImmortal(lookup) ? immortal : mortal; + int opcode = _Py_IsImmortal(lookup) || _PyObject_HasDeferredRefcount(lookup) ? deferred_refcount : mortal; REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup); PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); _Py_BloomFilter_Add(dependencies, type); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index bb663db195c089..4a30b67814d002 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1364,7 +1364,7 @@ init_interp_main(PyThreadState *tstate) } else #endif { - interp->jit = true; + interp->jit = 1; } } } @@ -1721,7 +1721,7 @@ finalize_modules(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; // Invalidate all executors and turn off JIT: - interp->jit = false; + FT_ATOMIC_STORE_UINT8(interp->jit, 0); interp->compiling = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); diff --git a/Python/pystate.c b/Python/pystate.c index cf55297cf8d94e..c6c8f6db7a7aa0 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -566,7 +566,7 @@ init_interpreter(PyInterpreterState *interp, interp->monitoring_tool_versions[t] = 0; } interp->_code_object_generation = 0; - interp->jit = false; + interp->jit = 0; interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; @@ -1545,6 +1545,12 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, { assert(interp->threads.head != tstate); if (next != NULL) { +#if defined(_Py_TIER2) && defined(Py_GIL_DISABLED) + FT_ATOMIC_STORE_UINT8(interp->jit, 0); + // There's more than one thread. In FT mode, + // disable the JIT completely for now. + _Py_Executors_InvalidateAll(interp, 1); +#endif assert(next->prev == NULL || next->prev == tstate); next->prev = tstate; } @@ -1846,6 +1852,13 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) _PyObject_VirtualFree(_tstate->jit_tracer_state.code_buffer, UOP_BUFFER_SIZE); _tstate->jit_tracer_state.code_buffer = NULL; } +#ifdef Py_GIL_DISABLED + // There's only one thread. Re-enable JIT. + PyThreadState *curr = interp->threads.head; + if (curr != NULL && curr->prev == NULL && curr->next == NULL) { + FT_ATOMIC_STORE_UINT8(interp->jit, 1); + } +#endif #endif HEAD_UNLOCK(runtime); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 94eb3164ecad58..ce54bc96221be9 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2365,7 +2365,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend) { #ifdef PY_HAVE_PERF_TRAMPOLINE #ifdef _Py_JIT - if (_PyInterpreterState_GET()->jit) { + if (FT_ATOMIC_LOAD_UINT8(_PyInterpreterState_GET()->jit)) { PyErr_SetString(PyExc_ValueError, "Cannot activate the perf trampoline if the JIT is active"); return NULL; } diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 3537c74a820365..90a4668f610275 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -118,6 +118,16 @@ do { \ #define ASSERT_WITHIN_STACK_BOUNDS(F, L) (void)0 #endif +// For now, the FT JIT only supports single-threaded code. +#undef LOCK_OBJECT +#undef UNLOCK_OBJECT +#define LOCK_OBJECT(op) (1) +#define UNLOCK_OBJECT(op) ((void)0) + +#ifdef Py_GIL_DISABLED +#undef Py_GIL_DISABLED +#endif + __attribute__((preserve_none)) _Py_CODEUNIT * _JIT_ENTRY( _PyExecutorObject *executor, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate,