2828
2929namespace ddprof {
3030
31- namespace {
32- // TLS key initialization states
33- constexpr int kKeyNotInitialized = 0 ;
34- constexpr int kKeyInitializing = 1 ;
35- constexpr int kKeyInitialized = 2 ;
36- } // namespace
37-
38- // Static declarations
39- std::atomic<int > AllocationTracker::_key_init_state{kKeyNotInitialized };
40-
41- pthread_key_t AllocationTracker::_tl_state_key;
42-
43- AllocationTracker *AllocationTracker::_instance;
31+ AllocationTracker *AllocationTracker::_instance = nullptr ;
32+ std::atomic<pthread_key_t > AllocationTracker::_tl_state_key{
33+ AllocationTracker::kInvalidKey };
4434
4535namespace {
4636DDPROF_NOINLINE auto sleep_and_retry_reserve (MPSCRingBufferWriter &writer,
@@ -65,36 +55,15 @@ TrackerThreadLocalState *AllocationTracker::get_tl_state() {
6555 // tls_get_addr can call into malloc, which can create a recursive loop
6656 // instead we call pthread APIs to control the creation of TLS objects
6757
68- // Thread-safe initialization using atomics (avoids pthread_once ABI issues)
69-
70- // Fast path: relaxed load is sufficient since we only care if it's
71- // initialized Once initialized, this value never changes, so no
72- // synchronization needed
73- if (_key_init_state.load (std::memory_order_relaxed) != kKeyInitialized ) {
74- // Slow path: need proper synchronization for initialization
75- int expected = kKeyNotInitialized ;
76- if (_key_init_state.compare_exchange_strong (expected, kKeyInitializing ,
77- std::memory_order_acq_rel)) {
78- // We won the race, do the initialization
79- make_key ();
80- _key_init_state.store (kKeyInitialized , std::memory_order_release);
81- } else {
82- // Another thread is initializing or already done, wait until complete
83- constexpr int k_max_init_wait_attempts = 100 ;
84- int attempts = 0 ;
85- while (_key_init_state.load (std::memory_order_acquire) !=
86- kKeyInitialized ) {
87- if (++attempts >= k_max_init_wait_attempts) {
88- return nullptr ;
89- }
90- std::this_thread::yield ();
91- std::this_thread::sleep_for (std::chrono::microseconds (1 ));
92- }
93- }
94- }
58+ // The pthread key is initialized during allocation_tracking_init() and
59+ // maintained by the fork handler, so we can directly use it here (hot path)
60+ pthread_key_t key = _tl_state_key.load (std::memory_order_relaxed);
61+
62+ // In debug builds, verify our assumption
63+ assert (key != kInvalidKey && " pthread key should be initialized before use" );
9564
96- auto *tl_state = static_cast <TrackerThreadLocalState *>(
97- pthread_getspecific (_tl_state_key ));
65+ auto *tl_state =
66+ static_cast <TrackerThreadLocalState *>( pthread_getspecific (key ));
9867 return tl_state;
9968}
10069
@@ -127,15 +96,49 @@ void AllocationTracker::delete_tl_state(void *tl_state) {
12796 delete static_cast <TrackerThreadLocalState *>(tl_state);
12897}
12998
130- void AllocationTracker::make_key () {
131- // delete is called on all key objects
132- pthread_key_create (&_tl_state_key, delete_tl_state);
99+ void AllocationTracker::ensure_key_initialized () {
100+ // Ensure pthread key is initialized (idempotent)
101+ pthread_key_t key = _tl_state_key.load (std::memory_order_acquire);
102+
103+ if (key == kInvalidKey ) {
104+ pthread_key_t new_key;
105+ if (pthread_key_create (&new_key, delete_tl_state) != 0 ) {
106+ return ; // Failed, will be retried later
107+ }
108+
109+ pthread_key_t expected = kInvalidKey ;
110+ if (!_tl_state_key.compare_exchange_strong (expected, new_key,
111+ std::memory_order_release)) {
112+ // Another thread beat us, clean up our key
113+ pthread_key_delete (new_key);
114+ }
115+ }
116+ }
117+
118+ static void child_after_fork_handler () {
119+ // After fork in child, verify the pthread key is still valid.
120+ // The key itself survives fork, but we want to ensure it's properly set.
121+ AllocationTracker::ensure_key_initialized ();
122+ }
123+
124+ void AllocationTracker::register_fork_handler () {
125+ static bool registered = false ;
126+ if (!registered) {
127+ pthread_atfork (nullptr , nullptr , child_after_fork_handler);
128+ registered = true ;
129+ }
133130}
134131
135132DDRes AllocationTracker::allocation_tracking_init (
136133 uint64_t allocation_profiling_rate, uint32_t flags,
137134 uint32_t stack_sample_size, const RingBufferInfo &ring_buffer,
138135 const IntervalTimerCheck &timer_check) {
136+ // Register fork handler to ensure key is valid after fork
137+ register_fork_handler ();
138+
139+ // Ensure pthread key is initialized before we try to use it
140+ ensure_key_initialized ();
141+
139142 TrackerThreadLocalState *tl_state = get_tl_state ();
140143 if (!tl_state) {
141144 // This is the time at which the init_tl_state should not fail
0 commit comments