Skip to content

Commit 67204b8

Browse files
committed
Replace heap-allocated void* TLS with a char[] buffer using
initial-exec __thread and placement new. - Add `initialized` field to TrackerThreadLocalState - Shared C header (tls_state_storage.h) for size/alignment constants between loader.c and C++ code, with static_assert - Clean up dead code: _key_once, _tl_state_key, delete_tl_state - Add standalone fork test validating TLS zero-init, fork inheritance, and fresh-thread behavior
1 parent 324eedb commit 67204b8

7 files changed

Lines changed: 216 additions & 31 deletions

File tree

include/lib/allocation_tracker.hpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <cstddef>
1717
#include <functional>
1818
#include <mutex>
19-
#include <pthread.h>
2019

2120
namespace ddprof {
2221

@@ -118,8 +117,6 @@ class AllocationTracker {
118117

119118
static AllocationTracker *create_instance();
120119

121-
static void delete_tl_state(void *tl_state);
122-
123120
void track_allocation(uintptr_t addr, size_t size,
124121
TrackerThreadLocalState &tl_state, bool is_large_alloc);
125122
void track_deallocation(uintptr_t addr, TrackerThreadLocalState &tl_state,
@@ -156,11 +153,6 @@ class AllocationTracker {
156153
AddressBitset _allocated_address_set;
157154
IntervalTimerCheck _interval_timer_check;
158155

159-
// These can not be tied to the internal state of the instance.
160-
// The creation of the instance depends on this
161-
static pthread_once_t _key_once; // ensures we call key creation a single time
162-
static pthread_key_t _tl_state_key;
163-
164156
static AllocationTracker *_instance;
165157
};
166158

include/lib/allocation_tracker_tls.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ struct TrackerThreadLocalState {
2525
// should not allocate because we might already
2626
// be inside an allocation)
2727

28+
// Set to true by placement new in init_tl_state().
29+
// Zero-initialized (false) in a fresh thread's TLS before init.
30+
bool initialized{true};
31+
2832
// In the choice of random generators, this one is smaller
2933
// - smaller than mt19937 (8 vs 5K)
3034
std::minstd_rand gen{std::random_device{}()};

include/lib/tls_state_storage.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed
2+
// under the Apache License Version 2.0. This product includes software
3+
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present
4+
// Datadog, Inc.
5+
6+
#pragma once
7+
8+
// C-compatible constants for the TLS buffer that stores
9+
// TrackerThreadLocalState. Used by loader.c (C) and allocation_tracker.cc
10+
// (C++). Correctness enforced at compile time via static_assert in
11+
// allocation_tracker.cc.
12+
#define DDPROF_TLS_STATE_SIZE 48
13+
#define DDPROF_TLS_STATE_ALIGN 8

src/lib/allocation_tracker.cc

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "ringbuffer_utils.hpp"
1616
#include "savecontext.hpp"
1717
#include "syscalls.hpp"
18+
#include "tls_state_storage.h"
1819
#include "tsc_clock.hpp"
1920

2021
#include <algorithm>
@@ -23,24 +24,27 @@
2324
#include <chrono>
2425
#include <cstdint>
2526
#include <cstdlib>
27+
#include <new>
2628
#include <unistd.h>
2729

2830
namespace ddprof {
2931

30-
// Static declarations
31-
pthread_once_t AllocationTracker::_key_once = PTHREAD_ONCE_INIT;
32-
33-
pthread_key_t AllocationTracker::_tl_state_key;
34-
3532
AllocationTracker *AllocationTracker::_instance;
3633

34+
static_assert(sizeof(TrackerThreadLocalState) == DDPROF_TLS_STATE_SIZE,
35+
"Update DDPROF_TLS_STATE_SIZE in tls_state_storage.h");
36+
static_assert(alignof(TrackerThreadLocalState) <= DDPROF_TLS_STATE_ALIGN,
37+
"Update DDPROF_TLS_STATE_ALIGN in tls_state_storage.h");
38+
3739
namespace {
3840

3941
#ifdef DDPROF_USE_LOADER
40-
extern "C"
41-
__attribute((tls_model("initial-exec"))) __thread void *ddprof_lib_state;
42+
extern "C" __attribute((tls_model(
43+
"initial-exec"))) __thread char ddprof_lib_state[DDPROF_TLS_STATE_SIZE];
4244
#else
43-
__attribute((tls_model("initial-exec"))) __thread void *ddprof_lib_state;
45+
__attribute((tls_model("initial-exec")))
46+
__attribute((aligned(DDPROF_TLS_STATE_ALIGN))) __thread char
47+
ddprof_lib_state[sizeof(TrackerThreadLocalState)];
4448
#endif
4549

4650
DDPROF_NOINLINE auto sleep_and_retry_reserve(MPSCRingBufferWriter &writer,
@@ -61,19 +65,19 @@ DDPROF_NOINLINE auto sleep_and_retry_reserve(MPSCRingBufferWriter &writer,
6165
} // namespace
6266

6367
TrackerThreadLocalState *AllocationTracker::get_tl_state() {
64-
return static_cast<TrackerThreadLocalState *>(ddprof_lib_state);
68+
// ddprof_lib_state is zero-initialized by libc for each new thread.
69+
// After placement new (init_tl_state), initialized is set to true.
70+
auto *state = reinterpret_cast<TrackerThreadLocalState *>(ddprof_lib_state);
71+
return state->initialized ? state : nullptr;
6572
}
6673

6774
TrackerThreadLocalState *AllocationTracker::init_tl_state() {
68-
// Since init_tl_state is only called in allocation_tracking_init and
69-
// notify_thread_start, there is no danger of reentering it when doing an
70-
// allocation.
71-
auto tl_state = std::make_unique<TrackerThreadLocalState>();
72-
tl_state->tid = ddprof::gettid();
73-
tl_state->stack_bounds = retrieve_stack_bounds();
74-
ddprof_lib_state = tl_state.get();
75-
76-
return tl_state.release();
75+
// Placement new into TLS -- no heap allocation, no cleanup needed on thread
76+
// exit. Safe to call after fork (TLS memory is inherited by child).
77+
auto *state = new (ddprof_lib_state) TrackerThreadLocalState{};
78+
state->tid = ddprof::gettid();
79+
state->stack_bounds = retrieve_stack_bounds();
80+
return state;
7781
}
7882

7983
AllocationTracker::AllocationTracker() = default;
@@ -83,10 +87,6 @@ AllocationTracker *AllocationTracker::create_instance() {
8387
return &tracker;
8488
}
8589

86-
void AllocationTracker::delete_tl_state(void *tl_state) {
87-
delete static_cast<TrackerThreadLocalState *>(tl_state);
88-
}
89-
9090
DDRes AllocationTracker::allocation_tracking_init(
9191
uint64_t allocation_profiling_rate, uint32_t flags,
9292
uint32_t stack_sample_size, const RingBufferInfo &ring_buffer,

src/lib/loader.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "constants.hpp"
77
#include "dd_profiling.h"
88
#include "lib_embedded_data.h"
9+
#include "tls_state_storage.h"
910

1011
#include <dlfcn.h>
1112
#include <fcntl.h>
@@ -19,7 +20,9 @@
1920
#include <unistd.h>
2021

2122
__attribute__((__visibility__("default")))
22-
__attribute__((tls_model("initial-exec"))) __thread void *ddprof_lib_state;
23+
__attribute__((tls_model("initial-exec")))
24+
__attribute__((aligned(DDPROF_TLS_STATE_ALIGN))) __thread char
25+
ddprof_lib_state[DDPROF_TLS_STATE_SIZE];
2326

2427
/* Role of loader is to ensure that all dependencies (libdl/lim/libpthread) of
2528
* libdd_profiling-embedded.so are satisfied before dlopen'ing it.

test/CMakeLists.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,37 @@ add_unit_test(address_bitset-ut address_bitset-ut.cc ../src/lib/address_bitset.c
393393

394394
add_unit_test(lib_logger-ut ./lib_logger-ut.cc)
395395

396+
# Standalone fork test for allocation tracker TLS (no gtest -- fork inside gtest is fragile)
397+
add_exe(
398+
allocation_tracker_fork_test
399+
allocation_tracker_fork_test.cc
400+
../src/lib/allocation_tracker.cc
401+
../src/lib/address_bitset.cc
402+
../src/logger.cc
403+
../src/lib/pthread_fixes.cc
404+
../src/lib/savecontext.cc
405+
../src/lib/saveregisters.cc
406+
../src/procutils.cc
407+
../src/ratelimiter.cc
408+
../src/ringbuffer_utils.cc
409+
../src/sys_utils.cc
410+
../src/tsc_clock.cc
411+
../src/perf_clock.cc
412+
../src/perf.cc
413+
../src/ddres_list.cc
414+
../src/perf_ringbuffer.cc
415+
../src/pevent_lib.cc
416+
../src/user_override.cc
417+
LIBRARIES llvm-demangle)
418+
target_include_directories(
419+
allocation_tracker_fork_test PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/lib
420+
${CMAKE_SOURCE_DIR}/src)
421+
422+
add_test(
423+
NAME allocation_tracker_fork_test
424+
COMMAND allocation_tracker_fork_test
425+
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
426+
396427
add_unit_test(
397428
create_elf-ut
398429
create_elf-ut.cc
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Unless explicitly stated otherwise all files in this repository are licensed
2+
// under the Apache License Version 2.0. This product includes software
3+
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present
4+
// Datadog, Inc.
5+
6+
// Standalone test (no gtest) to verify TLS availability across fork().
7+
// Gtest is avoided because forking inside gtest is fragile.
8+
9+
#include "lib/allocation_tracker.hpp"
10+
#include "loghandle.hpp"
11+
12+
#include <pthread.h>
13+
#include <sys/wait.h>
14+
#include <unistd.h>
15+
16+
using ddprof::AllocationTracker;
17+
18+
#define CHECK_OR_RETURN(cond, ...) \
19+
do { \
20+
if (!(cond)) { \
21+
LG_ERR(__VA_ARGS__); \
22+
return 1; \
23+
} \
24+
} while (0)
25+
26+
#define CHECK_OR_EXIT(cond, ...) \
27+
do { \
28+
if (!(cond)) { \
29+
LG_ERR(__VA_ARGS__); \
30+
_exit(1); \
31+
} \
32+
} while (0)
33+
34+
void *thread_check_null_tls(void *arg) {
35+
auto *result = static_cast<int *>(arg);
36+
auto *state = AllocationTracker::get_tl_state();
37+
if (state != nullptr) {
38+
LG_ERR("new thread expected NULL TLS, got %p", static_cast<void *>(state));
39+
*result = 1;
40+
return nullptr;
41+
}
42+
*result = 0;
43+
return nullptr;
44+
}
45+
46+
int run_child(void *parent_state) {
47+
// After fork, the __thread buffer is inherited: the child's main thread
48+
// sees the initialized state at the same virtual address.
49+
auto *child_inherited = AllocationTracker::get_tl_state();
50+
CHECK_OR_EXIT(child_inherited == parent_state,
51+
"expected inherited TLS %p, got %p", parent_state,
52+
static_cast<void *>(child_inherited));
53+
54+
// Re-init to get a fresh state for the child
55+
auto *child_state = AllocationTracker::init_tl_state();
56+
CHECK_OR_EXIT(child_state != nullptr, "init_tl_state() returned NULL");
57+
58+
auto *retrieved = AllocationTracker::get_tl_state();
59+
CHECK_OR_EXIT(retrieved == child_state, "get/init mismatch: %p vs %p",
60+
static_cast<void *>(retrieved),
61+
static_cast<void *>(child_state));
62+
63+
// A new thread in the child must start with NULL TLS (zero-initialized)
64+
pthread_t thread;
65+
int thread_result = -1;
66+
CHECK_OR_EXIT(pthread_create(&thread, nullptr, thread_check_null_tls,
67+
&thread_result) == 0,
68+
"pthread_create failed");
69+
pthread_join(thread, nullptr);
70+
CHECK_OR_EXIT(thread_result == 0, "child thread TLS was not NULL");
71+
72+
_exit(0);
73+
}
74+
75+
int main() {
76+
ddprof::LogHandle log_handle(LL_NOTICE);
77+
LG_NTC("allocation_tracker_fork_test starting");
78+
79+
// Before any init, main thread's TLS must be zero-initialized by libc,
80+
// so get_tl_state() should return NULL (initialized == false).
81+
auto *pre_init = AllocationTracker::get_tl_state();
82+
CHECK_OR_RETURN(pre_init == nullptr,
83+
"main thread TLS not zero-initialized before init (got %p)",
84+
static_cast<void *>(pre_init));
85+
86+
// Verify the same zero-initialization contract on a new thread
87+
{
88+
pthread_t thread;
89+
int thread_result = -1;
90+
CHECK_OR_RETURN(pthread_create(&thread, nullptr, thread_check_null_tls,
91+
&thread_result) == 0,
92+
"pthread_create failed (pre-fork thread)");
93+
pthread_join(thread, nullptr);
94+
CHECK_OR_RETURN(thread_result == 0,
95+
"pre-fork thread TLS was not zero-initialized");
96+
}
97+
98+
// Create TLS in parent
99+
auto *parent_state = AllocationTracker::init_tl_state();
100+
CHECK_OR_RETURN(parent_state != nullptr,
101+
"parent init_tl_state() returned NULL");
102+
103+
auto *retrieved = AllocationTracker::get_tl_state();
104+
CHECK_OR_RETURN(retrieved == parent_state, "parent get/init mismatch");
105+
106+
fflush(stdout);
107+
fflush(stderr);
108+
109+
pid_t pid = fork();
110+
CHECK_OR_RETURN(pid != -1, "fork failed: %s", strerror(errno));
111+
112+
if (pid == 0) {
113+
run_child(parent_state);
114+
_exit(0);
115+
}
116+
117+
// Parent: wait for child
118+
int status;
119+
waitpid(pid, &status, 0);
120+
121+
if (!WIFEXITED(status)) {
122+
if (WIFSIGNALED(status)) {
123+
LG_ERR("child killed by signal %d", WTERMSIG(status));
124+
} else {
125+
LG_ERR("child did not exit normally");
126+
}
127+
return 1;
128+
}
129+
130+
int exit_code = WEXITSTATUS(status);
131+
CHECK_OR_RETURN(exit_code == 0, "child exited with code %d", exit_code);
132+
133+
// Parent TLS should be unaffected by the fork
134+
auto *parent_after = AllocationTracker::get_tl_state();
135+
CHECK_OR_RETURN(parent_after == parent_state,
136+
"parent TLS corrupted after fork (%p vs %p)",
137+
static_cast<void *>(parent_after),
138+
static_cast<void *>(parent_state));
139+
140+
LG_NTC("allocation_tracker_fork_test passed");
141+
return 0;
142+
}

0 commit comments

Comments
 (0)