1111#include < algorithm>
1212#include < cstddef>
1313#include < cstdint>
14+ #include < cstdio>
1415#include < mutex>
1516#include < string>
1617#include < utility>
17- #include < utility>
1818#include < vector>
1919
2020
@@ -71,13 +71,137 @@ namespace
7171 CoverageFlushCallback g_flush_callback;
7272}
7373
74- // / The __cyg_profile_func_enter / __cyg_profile_func_exit hooks are provided so
75- // / the binary can be built with -finstrument-functions-after-inlining without
76- // / link errors. The shadow-stack depth approach was abandoned because
77- // / LLVMProfileData::FunctionPointer is always NULL in PIE builds — there is no
78- // / runtime text-address → profile-index mapping available.
79- // / min_depth in CovCounter is instead computed from the entry counter call count
80- // / in getCurrentCoveredNameRefs (see below).
74+ // / ── XRay call-depth tracking ─────────────────────────────────────────────────
75+ // /
76+ // / XRay injects nop sleds at every function entry/exit that can be patched at
77+ // / runtime to call a user-supplied handler. Unlike -finstrument-functions, XRay
78+ // / provides real runtime text addresses for each function (via
79+ // / __xray_function_address), allowing us to build the text_address →
80+ // / profile_data_index mapping that LLVMProfileData::FunctionPointer (always NULL
81+ // / in PIE builds) cannot provide.
82+ // /
83+ // / Flow:
84+ // / setCoverageTest('name') → __xray_patch() + register handler
85+ // / ... test runs, handler records min call depth per function_id ...
86+ // / setCoverageTest('') → __xray_unpatch(), flush depths
87+ // /
88+ // / After depth collection, __xray_function_address(id) + dladdr() resolves each
89+ // / function_id to a demangled symbol name, whose FNV64 hash is matched against
90+ // / LLVMProfileData::NameRef to write g_func_min_depth[profile_index].
91+
92+ #ifdef CLICKHOUSE_XRAY_INSTRUMENT_COVERAGE
93+
94+ #include < xray/xray_interface.h>
95+ #include < dlfcn.h>
96+ #include < atomic>
97+
98+ namespace
99+ {
100+
101+ // / FNV-64 hash matching LLVM's IndexedInstrProf::ComputeHash / __llvm_profile_str2hash.
102+ static uint64_t fnv64 (const char * s) __attribute__((xray_never_instrument));
103+ static uint64_t fnv64 (const char * s)
104+ {
105+ uint64_t h = 0xcbf29ce484222325ULL ;
106+ for (; *s; ++s) { h ^= static_cast <uint8_t >(*s); h *= 0x100000001b3ULL ; }
107+ return h;
108+ }
109+
110+ // / Map from XRay function_id → profile_data_index.
111+ // / Built once when XRay is first activated.
112+ static uint32_t * g_xray_to_profile = nullptr ; // / array indexed by xray func_id
113+ static int32_t g_xray_max_id = 0 ;
114+ static std::once_flag g_xray_map_once;
115+
116+ // / Per-function minimum call depth (indexed by profile_data_index), reset per test.
117+ static std::atomic<uint32_t > * g_xray_min_depth = nullptr ;
118+ static uint32_t g_xray_depth_size = 0 ;
119+
120+ // / Per-thread call depth, reset when generation changes (same as before).
121+ static uint32_t g_xray_generation = 0 ;
122+ thread_local uint32_t g_tls_xray_generation = UINT32_MAX;
123+ thread_local uint32_t g_tls_xray_depth = 0 ;
124+ thread_local uint32_t g_tls_xray_baseline = 0 ;
125+
126+ static void buildXRayProfileMap () __attribute__((xray_never_instrument));
127+ static void buildXRayProfileMap ()
128+ {
129+ std::call_once (g_xray_map_once, []
130+ {
131+ const int32_t max_id = __xray_max_function_id ();
132+ if (max_id <= 0 ) return ;
133+ g_xray_max_id = max_id;
134+
135+ // / Build NameRef → profile_data_index map first.
136+ const LLVMProfileData * begin = __llvm_profile_begin_data (); // NOLINT
137+ const LLVMProfileData * end = __llvm_profile_end_data (); // NOLINT
138+ const uint32_t n = static_cast <uint32_t >(end - begin);
139+ std::unordered_map<uint64_t , uint32_t > name_hash_to_idx;
140+ name_hash_to_idx.reserve (n);
141+ for (uint32_t i = 0 ; i < n; ++i)
142+ name_hash_to_idx.emplace (begin[i].NameRef , i);
143+
144+ // / Allocate depth array.
145+ g_xray_depth_size = n;
146+ g_xray_min_depth = new std::atomic<uint32_t >[n]; // NOLINT
147+ for (uint32_t i = 0 ; i < n; ++i)
148+ g_xray_min_depth[i].store (UINT32_MAX, std::memory_order_relaxed);
149+
150+ // / For each XRay function_id, resolve to profile_data_index via symbol name hash.
151+ g_xray_to_profile = new uint32_t [static_cast <size_t >(max_id) + 1 ]; // NOLINT
152+ std::fill (g_xray_to_profile, g_xray_to_profile + max_id + 1 , UINT32_MAX);
153+
154+ for (int32_t id = 1 ; id <= max_id; ++id)
155+ {
156+ const void * addr = reinterpret_cast <const void *>(__xray_function_address (id));
157+ if (!addr) continue ;
158+ Dl_info info;
159+ if (!dladdr (addr, &info) || !info.dli_sname ) continue ;
160+ const uint64_t h = fnv64 (info.dli_sname );
161+ const auto it = name_hash_to_idx.find (h);
162+ if (it != name_hash_to_idx.end ())
163+ g_xray_to_profile[id] = it->second ;
164+ }
165+ });
166+ }
167+
168+ static void xrayHandler (int32_t func_id, XRayEntryType type) __attribute__((xray_never_instrument));
169+ static void xrayHandler (int32_t func_id, XRayEntryType type)
170+ {
171+ if (type == XRayEntryType::ENTRY || type == XRayEntryType::LOG_ARGS_ENTRY)
172+ {
173+ if (g_tls_xray_generation != g_xray_generation) [[unlikely]]
174+ {
175+ g_tls_xray_baseline = g_tls_xray_depth;
176+ g_tls_xray_generation = g_xray_generation;
177+ }
178+ const uint32_t abs = ++g_tls_xray_depth;
179+ const uint32_t rel = (abs > g_tls_xray_baseline) ? abs - g_tls_xray_baseline : 0u ;
180+ const uint32_t depth = (rel < 255u ) ? rel : 254u ;
181+
182+ if (g_xray_to_profile && func_id >= 1 && func_id <= g_xray_max_id)
183+ {
184+ const uint32_t idx = g_xray_to_profile[func_id];
185+ if (idx < g_xray_depth_size)
186+ {
187+ auto & slot = g_xray_min_depth[idx];
188+ uint32_t old = slot.load (std::memory_order_relaxed);
189+ while (old > depth && !slot.compare_exchange_weak (old, depth, std::memory_order_relaxed)) {}
190+ }
191+ }
192+ }
193+ else if (type == XRayEntryType::EXIT || type == XRayEntryType::TAIL)
194+ {
195+ --g_tls_xray_depth;
196+ }
197+ }
198+
199+ } // anonymous namespace
200+
201+ #endif // CLICKHOUSE_XRAY_INSTRUMENT_COVERAGE
202+
203+ // / Stubs for -finstrument-functions (kept for link compatibility with builds that
204+ // / add the flag without XRay).
81205extern " C" void __cyg_profile_func_enter (void *, void *) __attribute__((no_instrument_function));
82206void __cyg_profile_func_enter (void *, void *) {}
83207
@@ -121,10 +245,22 @@ std::vector<CovCounter> getCurrentCoveredNameRefs()
121245 if (*entry_counter == 0 )
122246 continue ;
123247
124- // / Store the raw entry-counter call count (capped at 254) as min_depth.
125- // / 255 is reserved as "not tracked". A lower value means the function was
126- // / called fewer times during this test → more likely a specific code path.
127- const uint8_t min_depth = static_cast <uint8_t >(std::min<uint64_t >(*entry_counter, 254u ));
248+ // / min_depth: prefer XRay call depth (exact, built via text_addr→profile mapping)
249+ // / over the call-count proxy. XRay depth is populated only when the binary is
250+ // / built with -DCLICKHOUSE_XRAY_INSTRUMENT_COVERAGE=1 and XRay is activated.
251+ uint8_t min_depth;
252+ #ifdef CLICKHOUSE_XRAY_INSTRUMENT_COVERAGE
253+ if (g_xray_min_depth && idx < g_xray_depth_size)
254+ {
255+ const uint32_t xray_d = g_xray_min_depth[idx].load (std::memory_order_relaxed);
256+ min_depth = (xray_d < 255u ) ? static_cast <uint8_t >(xray_d) : 255u ;
257+ }
258+ else
259+ #endif
260+ {
261+ // / Fallback: raw entry-counter call count (lower = more specific).
262+ min_depth = static_cast <uint8_t >(std::min<uint64_t >(*entry_counter, 254u ));
263+ }
128264
129265 // / Emit one entry per non-zero counter. Counter 0 is the function entry;
130266 // / counters 1…N are individual basic-block/branch counters that map to
@@ -140,6 +276,72 @@ std::vector<CovCounter> getCurrentCoveredNameRefs()
140276}
141277
142278
279+ // / LLVM value-profiling node: records one (value, count) pair for indirect calls.
280+ // / Matches compiler-rt's ValueProfNode layout.
281+ struct ValueProfNode
282+ {
283+ uint64_t value; // / Absolute runtime address of the callee
284+ uint64_t count;
285+ ValueProfNode * next;
286+ };
287+
288+ std::vector<IndirectCallEntry> getCurrentIndirectCalls ()
289+ {
290+ std::vector<IndirectCallEntry> result;
291+
292+ // / Compute load base from /proc/self/maps: the r-xp mapping for the main binary.
293+ // / We use this to turn absolute callee addresses into load-relative offsets that
294+ // / are stable across ASLR restarts (same binary = same offset, different run = same value).
295+ uintptr_t load_base = 0 ;
296+ if (FILE * f = std::fopen (" /proc/self/maps" , " r" ))
297+ {
298+ char line[512 ];
299+ while (std::fgets (line, sizeof (line), f))
300+ {
301+ uintptr_t start = 0 , end = 0 , offset = 0 ;
302+ char perms[8 ] = {};
303+ // Parse: start-end perms offset dev inode path
304+ if (std::sscanf (line, " %zx-%zx %4s %zx" , &start, &end, perms, &offset) == 4
305+ && perms[0 ] == ' r' && perms[2 ] == ' x' && offset == 0 )
306+ {
307+ load_base = start;
308+ break ;
309+ }
310+ }
311+ std::fclose (f);
312+ }
313+
314+ const LLVMProfileData * begin = __llvm_profile_begin_data (); // NOLINT
315+ const LLVMProfileData * end = __llvm_profile_end_data (); // NOLINT
316+
317+ for (const LLVMProfileData * data = begin; data != end; ++data)
318+ {
319+ // / IPVK_IndirectCallTarget is kind 0; NumValueSites[0] is the number of
320+ // / indirect-call sites instrumented in this function.
321+ if (data->NumValueSites [0 ] == 0 || !data->Values )
322+ continue ;
323+
324+ // / Values → array of ValueProfNode* heads, one per indirect-call site.
325+ auto * const sites = static_cast <ValueProfNode * const *>(data->Values );
326+
327+ for (uint16_t s = 0 ; s < data->NumValueSites [0 ]; ++s)
328+ {
329+ for (const ValueProfNode * node = sites[s]; node; node = node->next )
330+ {
331+ if (node->count == 0 || node->value == 0 )
332+ continue ;
333+ const uint64_t offset = (load_base && node->value > load_base)
334+ ? node->value - load_base
335+ : node->value ;
336+ result.push_back ({data->NameRef , data->FuncHash , offset, node->count });
337+ }
338+ }
339+ }
340+
341+ return result;
342+ }
343+
344+
143345void registerCoverageFlushCallback (CoverageFlushCallback cb)
144346{
145347 std::lock_guard lock (g_coverage_mutex);
@@ -153,28 +355,50 @@ void setCoverageTest(std::string_view test_name)
153355 // / the callback (which may do heavy DB work and must not hold the mutex).
154356 std::string prev_test_name;
155357 std::vector<CovCounter> name_refs;
358+ std::vector<IndirectCallEntry> indirect_calls;
156359 CoverageFlushCallback cb;
157360
158361 {
159362 std::lock_guard lock (g_coverage_mutex);
160363
161364 if (!g_current_test_name.empty () && g_flush_callback)
162365 {
163- // / Collect covered NameRefs before resetting counters.
164- name_refs = getCurrentCoveredNameRefs ();
366+ // / Collect covered NameRefs and indirect-call observations before reset.
367+ name_refs = getCurrentCoveredNameRefs ();
368+ indirect_calls = getCurrentIndirectCalls ();
165369 prev_test_name = g_current_test_name;
166370 cb = g_flush_callback;
167371 }
168372
169373 __llvm_profile_reset_counters (); // NOLINT
170374
375+ #ifdef CLICKHOUSE_XRAY_INSTRUMENT_COVERAGE
376+ // / Bump XRay generation so every thread resets its depth baseline on next call.
377+ ++g_xray_generation;
378+ // / Reset per-function min-depth array.
379+ for (uint32_t i = 0 ; i < g_xray_depth_size; ++i)
380+ g_xray_min_depth[i].store (UINT32_MAX, std::memory_order_relaxed);
381+ if (!g_current_test_name.empty ())
382+ {
383+ // / Deactivate XRay after collecting the previous test's data.
384+ __xray_unpatch ();
385+ }
386+ if (!test_name.empty ())
387+ {
388+ // / Build the XRay→profile map lazily on first test start.
389+ buildXRayProfileMap ();
390+ __xray_set_handler (xrayHandler);
391+ __xray_patch ();
392+ }
393+ #endif
394+
171395 g_current_test_name = std::string (test_name);
172396 }
173397
174398 // / Invoke the callback outside the lock so the DB layer can look up
175399 // / source locations and insert into system.coverage_log without risking deadlock.
176400 if (cb)
177- cb (prev_test_name, name_refs);
401+ cb (prev_test_name, name_refs, indirect_calls );
178402}
179403
180404void resetCoverage ()
0 commit comments