diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-05-19-10-56.gh-issue-145566.H4RupyYN.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-05-19-10-56.gh-issue-145566.H4RupyYN.rst new file mode 100644 index 00000000000000..723b81ddc5f897 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-05-19-10-56.gh-issue-145566.H4RupyYN.rst @@ -0,0 +1,2 @@ +In the free threading build, skip the stop-the-world pause when reassigning +``__class__`` on a newly created object. diff --git a/Objects/mimalloc/alloc.c b/Objects/mimalloc/alloc.c index 44c84cf1931717..fec1314244c24e 100644 --- a/Objects/mimalloc/alloc.c +++ b/Objects/mimalloc/alloc.c @@ -625,6 +625,10 @@ bool _mi_free_delayed_block(mi_block_t* block) { } // collect all other non-local frees to ensure up-to-date `used` count + if (page->qsbr_node.next != NULL && (page->local_free != NULL || mi_page_thread_free(page) != NULL)) { + static _Atomic(int) _c; int _n = 1+atomic_fetch_add(&_c,1); + if (_n%100==0||_n<=3) printf("QSBR CLEAR from _mi_free_delayed_block page=%p all_free=%d used=%d (%d)\n",(void*)page,(int)mi_page_all_free(page),(int)page->used,_n); + } _mi_page_free_collect(page, false); // and free the block (possibly freeing the page as well since used is updated) diff --git a/Objects/mimalloc/page.c b/Objects/mimalloc/page.c index ff7444cce10923..6731f690cd2d3f 100644 --- a/Objects/mimalloc/page.c +++ b/Objects/mimalloc/page.c @@ -226,6 +226,13 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // and the local free list if (page->local_free != NULL) { // any previous QSBR goals are no longer valid because we reused the page + if (page->qsbr_node.next != NULL) { + extern _Atomic(int) _debug_qsbr_clear_in_collect; + int n = 1 + atomic_fetch_add(&_debug_qsbr_clear_in_collect, 1); + if (n%100==0||n<=3) printf("QSBR CLEAR generic page=%p all_free=%d used=%d xfree=%d lfree=%d (%d)\n", + (void*)page,(int)mi_page_all_free(page),(int)page->used, + (mi_page_thread_free(page)!=NULL),(page->local_free!=NULL),n); + } _PyMem_mi_page_clear_qsbr(page); if mi_likely(page->free == NULL) { @@ -371,6 +378,10 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { if (mi_page_is_in_full(page)) return; mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); + if (page->qsbr_node.next != NULL && (page->local_free != NULL || mi_page_thread_free(page) != NULL)) { + static _Atomic(int) _c; int _n = 1+atomic_fetch_add(&_c,1); + if (_n%100==0||_n<=3) printf("QSBR CLEAR from mi_page_to_full page=%p all_free=%d used=%d (%d)\n",(void*)page,(int)mi_page_all_free(page),(int)page->used,_n); + } _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set } @@ -752,6 +763,10 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p #endif // 0. collect freed blocks by us and other threads + if (page->qsbr_node.next != NULL && (page->local_free != NULL || mi_page_thread_free(page) != NULL)) { + static _Atomic(int) _c; int _n = 1+atomic_fetch_add(&_c,1); + if (_n%100==0||_n<=3) printf("QSBR CLEAR from find_free_ex page=%p all_free=%d used=%d (%d)\n",(void*)page,(int)mi_page_all_free(page),(int)page->used,_n); + } _mi_page_free_collect(page, false); // 1. if the page contains free blocks, we are done @@ -777,6 +792,15 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_heap_stat_counter_increase(heap, searches, count); if (page == NULL) { + { + static _Atomic(int) null_page_count; + int n = 1 + atomic_fetch_add(&null_page_count, 1); + if (n % 50 == 0 || n <= 5) { + printf("find_free_ex: page==NULL tid=%zu heap_tid=%zu use_qsbr=%d (call #%d)\n", + (size_t)_mi_thread_id(), (size_t)heap->thread_id, + heap->page_use_qsbr, n); + } + } _PyMem_mi_heap_collect_qsbr(heap); // some pages might be safe to free now _mi_heap_collect_retired(heap, false); // perhaps make a page available? page = mi_page_fresh(heap, pq); @@ -809,6 +833,10 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { else #endif { + if (page->qsbr_node.next != NULL && (page->local_free != NULL || mi_page_thread_free(page) != NULL)) { + static _Atomic(int) _c; int _n = 1+atomic_fetch_add(&_c,1); + if (_n%100==0||_n<=3) printf("QSBR CLEAR from mi_page_fresh_alloc page=%p all_free=%d used=%d (%d)\n",(void*)page,(int)mi_page_all_free(page),(int)page->used,_n); + } _mi_page_free_collect(page,false); } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 092be84d2b9954..d3772669a609c6 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -151,6 +151,8 @@ should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page) } #endif +_Atomic(int) _debug_qsbr_clear_in_collect; + static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { @@ -174,7 +176,19 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared); } - llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); + mi_heap_t *page_heap = mi_page_heap(page); + _PyThreadStateImpl *heap_tstate = _Py_CONTAINER_OF(page_heap->tld, _PyThreadStateImpl, mimalloc.tld); + if (page_heap->thread_id != _mi_thread_id()) { + static _Atomic(int) cross_thread_qsbr_count; + int n = 1 + atomic_fetch_add(&cross_thread_qsbr_count, 1); + if (n % 100 == 0) { + _PyThreadStateImpl *cur_tstate = (_PyThreadStateImpl *)PyThreadState_GET(); + printf("cross-thread QSBR page count: %d (page_tid=%zu cur_tid=%zu heap_tstate=%p cur_tstate=%p)\n", + n, (size_t)page_heap->thread_id, (size_t)_mi_thread_id(), + (void*)heap_tstate, (void*)cur_tstate); + } + } + llist_insert_tail(&heap_tstate->mimalloc.page_list, &page->qsbr_node); return false; } #endif @@ -212,25 +226,61 @@ _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap) _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); struct llist_node *head = &tstate->mimalloc.page_list; if (llist_empty(head)) { + static _Atomic(int) empty_qsbr_count; + int n = 1 + atomic_fetch_add(&empty_qsbr_count, 1); + if (n % 50 == 0 || n <= 5) { + _PyThreadStateImpl *heap_ts = _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld); + printf("qsbr_collect EMPTY tid=%zu heap_tid=%zu tstate=%p heap_tstate=%p (call #%d)\n", + (size_t)_mi_thread_id(), (size_t)heap->thread_id, + (void*)tstate, (void*)heap_ts, n); + } return; } + int freed = 0, not_free = 0, not_reached = 0; struct llist_node *node; llist_for_each_safe(node, head) { mi_page_t *page = llist_data(node, mi_page_t, qsbr_node); if (!mi_page_all_free(page)) { // We allocated from this page some point after the delayed free + not_free++; _PyMem_mi_page_clear_qsbr(page); continue; } if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) { - return; + not_reached++; + // On first failure, log the details + if (not_reached == 1) { + struct _qsbr_shared *shared = tstate->qsbr->shared; + printf(" qsbr FAIL: goal=%llu rd_seq=%llu wr_seq=%llu my_seq=%llu\n", + (unsigned long long)page->qsbr_goal, + (unsigned long long)_Py_atomic_load_uint64(&shared->rd_seq), + (unsigned long long)_Py_atomic_load_uint64(&shared->wr_seq), + (unsigned long long)_Py_atomic_load_uint64(&tstate->qsbr->seq)); + // scan threads to find the blocker + struct _qsbr_pad *array = shared->array; + for (Py_ssize_t ii = 0; ii < shared->size; ii++) { + uint64_t s = _Py_atomic_load_uint64(&array[ii].qsbr.seq); + if (s != QSBR_OFFLINE && s < page->qsbr_goal) { + printf(" blocker slot %zd: seq=%llu\n", ii, (unsigned long long)s); + } + } + } + // count remaining + while (node->next != head) { not_reached++; node = node->next; } + break; } + freed++; _PyMem_mi_page_clear_qsbr(page); _mi_page_free(page, mi_page_queue_of(page), false); } + if (freed || not_free || not_reached) { + printf("qsbr_collect tid=%zu: freed=%d not_free=%d not_reached=%d heap_tid=%zu\n", + (size_t)_mi_thread_id(), freed, not_free, not_reached, + (size_t)heap->thread_id); + } #endif } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 03d5cfa4ca5249..ab21770734dffe 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -7120,7 +7120,11 @@ object_set_class_world_stopped(PyObject *self, PyTypeObject *newto) assert(_PyObject_GetManagedDict(self) == dict); - if (_PyDict_DetachFromObject(dict, self) < 0) { + int err; + Py_BEGIN_CRITICAL_SECTION(dict); + err = _PyDict_DetachFromObject(dict, self); + Py_END_CRITICAL_SECTION(); + if (err < 0) { return -1; } @@ -7161,13 +7165,18 @@ object_set_class(PyObject *self, PyObject *value, void *closure) } #ifdef Py_GIL_DISABLED + int unique = _PyObject_IsUniquelyReferenced(self); PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); + if (!unique) { + _PyEval_StopTheWorld(interp); + } #endif PyTypeObject *oldto = Py_TYPE(self); int res = object_set_class_world_stopped(self, newto); #ifdef Py_GIL_DISABLED - _PyEval_StartTheWorld(interp); + if (!unique) { + _PyEval_StartTheWorld(interp); + } #endif if (res == 0) { if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) {