diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index c6c82038d7c85f..f6bdba3e9916c0 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -103,7 +103,6 @@ extern int _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *); extern void _PyPerfTrampoline_GetCallbacks(_PyPerf_Callbacks *); extern int _PyPerfTrampoline_Init(int activate); extern int _PyPerfTrampoline_Fini(void); -extern void _PyPerfTrampoline_FreeArenas(void); extern int _PyIsPerfTrampolineActive(void); extern PyStatus _PyPerfTrampoline_AfterFork_Child(void); #ifdef PY_HAVE_PERF_TRAMPOLINE diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 818c4f159591fe..3fe1fdaa1589b6 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -87,7 +87,9 @@ struct _ceval_runtime_state { struct trampoline_api_st trampoline_api; FILE *map_file; Py_ssize_t persist_after_fork; - _PyFrameEvalFunction prev_eval_frame; + _PyFrameEvalFunction prev_eval_frame; + Py_ssize_t trampoline_refcount; + int code_watcher_id; #else int _not_used; #endif diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-27-23-57-43.gh-issue-143228.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-27-23-57-43.gh-issue-143228.m3EF9E.rst new file mode 100644 index 00000000000000..893bc29543d91d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-27-23-57-43.gh-issue-143228.m3EF9E.rst @@ -0,0 +1,4 @@ +Fix use-after-free in perf trampoline when toggling profiling while +threads are running or during interpreter finalization with daemon threads +active. The fix uses reference counting to ensure trampolines are not freed +while any code object could still reference them. Pach by Pablo Galindo diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 335d8ac7dadd10..c0dc1f7a49bdca 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -204,6 +204,42 @@ enum perf_trampoline_type { #define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork #define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type #define prev_eval_frame _PyRuntime.ceval.perf.prev_eval_frame +#define trampoline_refcount _PyRuntime.ceval.perf.trampoline_refcount +#define code_watcher_id _PyRuntime.ceval.perf.code_watcher_id + +static void free_code_arenas(void); + +static void +perf_trampoline_reset_state(void) +{ + free_code_arenas(); + if (code_watcher_id >= 0) { + PyCode_ClearWatcher(code_watcher_id); + code_watcher_id = -1; + } + extra_code_index = -1; +} + +static int +perf_trampoline_code_watcher(PyCodeEvent event, PyCodeObject *co) +{ + if (event != PY_CODE_EVENT_DESTROY) { + return 0; + } + if (extra_code_index == -1) { + return 0; + } + py_trampoline f = NULL; + int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); + if (ret != 0 || f == NULL) { + return 0; + } + trampoline_refcount--; + if (trampoline_refcount == 0) { + perf_trampoline_reset_state(); + } + return 0; +} static void perf_map_write_entry(void *state, const void *code_addr, @@ -407,6 +443,7 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, perf_code_arena->code_size, co); _PyCode_SetExtra((PyObject *)co, extra_code_index, (void *)new_trampoline); + trampoline_refcount++; f = new_trampoline; } assert(f != NULL); @@ -433,6 +470,7 @@ int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) } trampoline_api.write_state(trampoline_api.state, new_trampoline, perf_code_arena->code_size, co); + trampoline_refcount++; return _PyCode_SetExtra((PyObject *)co, extra_code_index, (void *)new_trampoline); } @@ -487,6 +525,10 @@ _PyPerfTrampoline_Init(int activate) { #ifdef PY_HAVE_PERF_TRAMPOLINE PyThreadState *tstate = _PyThreadState_GET(); + if (code_watcher_id == 0) { + // Initialize to -1 since 0 is a valid watcher ID + code_watcher_id = -1; + } if (!activate) { _PyInterpreterState_SetEvalFrameFunc(tstate->interp, prev_eval_frame); perf_status = PERF_STATUS_NO_INIT; @@ -504,6 +546,13 @@ _PyPerfTrampoline_Init(int activate) if (new_code_arena() < 0) { return -1; } + code_watcher_id = PyCode_AddWatcher(perf_trampoline_code_watcher); + if (code_watcher_id < 0) { + PyErr_FormatUnraisable("Failed to register code watcher for perf trampoline"); + free_code_arenas(); + return -1; + } + trampoline_refcount = 1; // Base refcount held by the system perf_status = PERF_STATUS_OK; } #endif @@ -525,17 +574,19 @@ _PyPerfTrampoline_Fini(void) trampoline_api.free_state(trampoline_api.state); perf_trampoline_type = PERF_TRAMPOLINE_UNSET; } - extra_code_index = -1; + + // Prevent new trampolines from being created perf_status = PERF_STATUS_NO_INIT; -#endif - return 0; -} -void _PyPerfTrampoline_FreeArenas(void) { -#ifdef PY_HAVE_PERF_TRAMPOLINE - free_code_arenas(); + // Decrement base refcount. If refcount reaches 0, all code objects are already + // dead so clean up now. Otherwise, watcher remains active to clean up when last + // code object dies; extra_code_index stays valid so watcher can identify them. + trampoline_refcount--; + if (trampoline_refcount == 0) { + perf_trampoline_reset_state(); + } #endif - return; + return 0; } int diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 45b585faf9c980..bb663db195c089 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1944,7 +1944,6 @@ finalize_interp_clear(PyThreadState *tstate) _PyArg_Fini(); _Py_ClearFileSystemEncoding(); _PyPerfTrampoline_Fini(); - _PyPerfTrampoline_FreeArenas(); } finalize_interp_types(tstate->interp);