From 8f9cb1e097b5e8cb4dd8704febd854a93a3ad19c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 2 Dec 2025 16:55:07 -0800 Subject: [PATCH 1/6] snooper: demonstrate task_work and stack unwinding WITH task_work_schedule_signal(): $ sudo ./snooper `pidof app` Snooping on tasks for PID 5331... Task: app_thread (PID=5331, TID=5332) No kernel stack User stack: 00007fbc700b90c2: random @ 0x42080+0x42 00007fbc700b8e7d: rand @ 0x41e70+0xd 00000000004012df: func_b @ 0x40129a+0x45 0000000000401283: func_a @ 0x4011f2+0x91 0000000000401476: thread_func @ 0x4013f6+0x80 00007fbc7010b39d: 00007fbc7019049c: Task: app (PID=5331, TID=5331) Kernel stack: ffffffff813e7e49: hrtimer_nanosleep @ 0xffffffff813e7db0+0x99 ffffffff813f1c73: common_nsleep @ 0xffffffff813f1c30+0x43 ffffffff813f2e0d: __x64_sys_clock_nanosleep @ 0xffffffff813f2d50+0xbd ffffffff8241a09a: do_syscall_64 @ 0xffffffff8241a030+0x6a ffffffff810000b0: entry_SYSCALL_64_after_hwframe @ 0xffffffff81000065+0x4b User stack: 00007fbc70158733: clock_nanosleep @ 0xe16d0+0x63 00007fbc70164827: nanosleep @ 0xed810+0x17 00007fbc70176f41: sleep @ 0xfff00+0x41 0000000000401622: main @ 0x40153c+0xe6 00007fbc7009ce08: 00007fbc7009cecc: __libc_start_main @ 0x25e40+0x8c 0000000000401105: _start @ 0x4010e0+0x25 WITHOUT task_work_schedule_signal() CANNOT get correct stack trace after multiple tries: $ sudo ./snooper `pidof app` Snooping on tasks for PID 5004... Task: app_thread (PID=5004, TID=5005) No kernel stack User stack (frame pointer unwinding): 00000000004011fd: func_mux @ 0x4011f2+0xb 15c15fd673415a00: $ sudo ./snooper `pidof app` Snooping on tasks for PID 5004... Task: app_thread (PID=5004, TID=5005) No kernel stack User stack (frame pointer unwinding): 00007fcbaf2490a8: random @ 0x42080+0x28 000062cbaf248e7d: 0000000000401269: func_mux @ 0x4011f2+0x77 00000000004012a6: func_a @ 0x40126e+0x38 000000000040124b: func_mux @ 0x4011f2+0x59 15c15fd673415a00: Note that 15c15fd673415a00 is NOT a valid address. Signed-off-by: Andrii Nakryiko --- examples/c/.gitignore | 2 + examples/c/Makefile | 2 +- examples/c/snooper.bpf.c | 141 ++++++++++++++++++++++++ examples/c/snooper.c | 229 +++++++++++++++++++++++++++++++++++++++ examples/c/snooper.h | 26 +++++ 5 files changed, 399 insertions(+), 1 deletion(-) create mode 100644 examples/c/snooper.bpf.c create mode 100644 examples/c/snooper.c create mode 100644 examples/c/snooper.h diff --git a/examples/c/.gitignore b/examples/c/.gitignore index 9edf6d75..59362939 100644 --- a/examples/c/.gitignore +++ b/examples/c/.gitignore @@ -16,3 +16,5 @@ /lsm /cmake-build-debug/ /cmake-build-release/ +/snooper +compile_commands.json diff --git a/examples/c/Makefile b/examples/c/Makefile index 912b4e5e..4da26994 100644 --- a/examples/c/Makefile +++ b/examples/c/Makefile @@ -31,7 +31,7 @@ CARGO ?= $(shell which cargo) ifeq ($(strip $(CARGO)),) BZS_APPS := else -BZS_APPS := profile +BZS_APPS := profile snooper APPS += $(BZS_APPS) # Required by libblazesym ALL_LDFLAGS += -lrt -ldl -lpthread -lm diff --git a/examples/c/snooper.bpf.c b/examples/c/snooper.bpf.c new file mode 100644 index 00000000..a182b095 --- /dev/null +++ b/examples/c/snooper.bpf.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2025 Meta Platforms, Inc. */ +#include "vmlinux.h" +#include +#include +#include + +#include "snooper.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +struct task_state { + struct task_event event; + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 4096); + __type(key, __u32); + __type(value, struct task_state); +} task_states SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1024 * 1024); +} rb SEC(".maps"); + +/* + * Frame pointer-based user stack unwinding. + * + * On x86_64 with frame pointers enabled (-fno-omit-frame-pointer): + * [rbp + 0] = saved rbp (previous frame pointer) + * [rbp + 8] = return address + * + * We walk the chain of frame pointers to collect return addresses. + */ +static int unwind_user_stack(struct task_struct *task, __u64 *stack, int max_depth) +{ + struct pt_regs *regs; + struct frame { + __u64 next_fp; /* saved frame pointer (rbp) */ + __u64 ret_addr; /* return address */ + } frame; + __u64 fp; + unsigned i = 0; + + regs = bpf_core_cast((void *)bpf_task_pt_regs(task), struct pt_regs); + if (!(regs->cs & 3)) + return 0; /* not in user space mode */ + + stack[0] = regs->ip; + + fp = regs->bp; + bpf_for(i, 1, MAX_STACK_DEPTH) { + /* read the frame, [fp] = next_fp, [fp+8] = ret_addr */ + if (bpf_copy_from_user_task(&frame, sizeof(frame), (void *)fp, task, 0)) + break; + + barrier_var(i); + if (i < MAX_STACK_DEPTH) + stack[i] = frame.ret_addr; + + fp = frame.next_fp; + } + + return i * sizeof(__u64); +} + +static int task_work_cb(struct bpf_map *map, void *key, void *value) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct task_state *state = value; + struct task_event *event = &state->event; + __u32 tid = task->pid; + + if (event->tid != task->pid) { + bpf_printk("MISMATCHED PID %d != expected %d", task->pid, event->tid); + goto cleanup; + } + + event->ustack_sz = unwind_user_stack(task, event->ustack, MAX_STACK_DEPTH); + + bpf_ringbuf_output(&rb, event, sizeof(*event), 0); + +cleanup: + bpf_map_delete_elem(&task_states, key); + return 0; +} + +/* + * THIS DOESN'T CURRENTLY WORK: + * static struct task_state empty_state; + * + * Verifier will complain: + * bpf_task_work cannot be accessed directly by load/store + */ +static char empty_state[sizeof(struct task_state)]; + +SEC("iter.s/task") +int snoop_tasks(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + struct task_state *state; + struct task_event *event; + __u32 tid; + int err; + + if (!task) + return 0; + + tid = task->pid; + + err = bpf_map_update_elem(&task_states, &tid, &empty_state, BPF_NOEXIST); + if (err) { + bpf_printk("Unexpected error adding task state for %d (%s): %d", tid, task->comm, err); + return 0; + } + state = bpf_map_lookup_elem(&task_states, &tid); + if (!state) { + bpf_printk("Unexpected error fetching task state for %d (%s): %d", tid, task->comm, err); + return 0; + } + + event = &state->event; + event->pid = task->tgid; + event->tid = task->pid; + bpf_probe_read_kernel_str(event->comm, TASK_COMM_LEN, task->comm); + + event->kstack_sz = bpf_get_task_stack(task, event->kstack, sizeof(event->kstack), 0); + + err = bpf_task_work_schedule_signal_impl(task, &state->tw, &task_states, task_work_cb, NULL); + if (err) { + bpf_printk("Unexpected error scheduling task work %d (%s): %d", tid, task->comm, err); + bpf_map_delete_elem(&task_states, &tid); + return 0; + } + + return 0; +} diff --git a/examples/c/snooper.c b/examples/c/snooper.c new file mode 100644 index 00000000..1918a08c --- /dev/null +++ b/examples/c/snooper.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2025 Meta Platforms, Inc. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "snooper.skel.h" +#include "snooper.h" +#include "blazesym.h" + +static struct blaze_symbolizer *symbolizer; +static volatile bool exiting = false; + +static void sig_handler(int sig) +{ + exiting = true; +} + +static void print_frame(const char *name, uintptr_t input_addr, uintptr_t addr, + uint64_t offset, const blaze_symbolize_code_info* code_info) +{ + if (input_addr != 0) { + printf(" %016lx: %s @ 0x%lx+0x%lx", input_addr, name, addr, offset); + if (code_info != NULL && code_info->dir != NULL && code_info->file != NULL) { + printf(" %s/%s:%u\n", code_info->dir, code_info->file, code_info->line); + } else if (code_info != NULL && code_info->file != NULL) { + printf(" %s:%u\n", code_info->file, code_info->line); + } else { + printf("\n"); + } + } else { + printf(" %16s %s", "", name); + if (code_info != NULL && code_info->dir != NULL && code_info->file != NULL) { + printf("@ %s/%s:%u [inlined]\n", code_info->dir, code_info->file, code_info->line); + } else if (code_info != NULL && code_info->file != NULL) { + printf("@ %s:%u [inlined]\n", code_info->file, code_info->line); + } else { + printf("[inlined]\n"); + } + } +} + +static void show_stack_trace(__u64 *stack, int stack_sz, pid_t pid) +{ + const struct blaze_symbolize_inlined_fn* inlined; + const struct blaze_syms *syms; + const struct blaze_sym *sym; + int i, j; + + assert(sizeof(uintptr_t) == sizeof(uint64_t)); + + if (pid) { + struct blaze_symbolize_src_process src = { + .type_size = sizeof(src), + .pid = pid, + }; + + syms = blaze_symbolize_process_abs_addrs(symbolizer, &src, + (const uintptr_t *)stack, stack_sz); + } else { + struct blaze_symbolize_src_kernel src = { + .type_size = sizeof(src), + }; + + syms = blaze_symbolize_kernel_abs_addrs(symbolizer, &src, + (const uintptr_t *)stack, stack_sz); + } + + if (!syms) { + printf(" failed to symbolize addresses: %s\n", blaze_err_str(blaze_err_last())); + return; + } + + for (i = 0; i < stack_sz; i++) { + if (!syms || syms->cnt <= i || syms->syms[i].name == NULL) { + printf(" %016llx: \n", stack[i]); + continue; + } + + sym = &syms->syms[i]; + print_frame(sym->name, stack[i], sym->addr, sym->offset, &sym->code_info); + + for (j = 0; j < sym->inlined_cnt; j++) { + inlined = &sym->inlined[j]; + print_frame(inlined->name, 0, 0, 0, &inlined->code_info); + } + } + + blaze_syms_free(syms); +} + +/* Ringbuf callback for task events */ +static int handle_event(void *ctx, void *data, size_t size) +{ + struct task_event *event = data; + + printf("Task: %s (PID=%d, TID=%d)\n", event->comm, event->pid, event->tid); + + /* Show kernel stack trace */ + if (event->kstack_sz > 0) { + printf(" Kernel stack:\n"); + show_stack_trace(event->kstack, event->kstack_sz / sizeof(__u64), 0); + } else if (event->kstack_sz < 0) { + printf(" Kernel stack error: %d\n", event->kstack_sz); + } else { + printf(" No kernel stack\n"); + } + + /* Show user stack trace */ + if (event->ustack_sz > 0) { + printf(" User stack:\n"); + show_stack_trace(event->ustack, event->ustack_sz / sizeof(__u64), event->pid); + } else if (event->ustack_sz < 0) { + printf(" User stack error: %d\n", event->ustack_sz); + } else { + printf(" No user stack\n"); + } + + printf("\n"); + return 0; +} + +static void show_help(const char *progname) +{ + printf("Usage: %s \n", progname); + printf(" PID Process ID to filter tasks (required)\n"); +} + +int main(int argc, char **argv) +{ + struct ring_buffer *rb = NULL; + struct snooper_bpf *skel = NULL; + LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + pid_t pid_filter = 0; + int iter_fd = -1; + int err = 0; + char dummy; + + if (argc < 2) { + show_help(argv[0]); + return 1; + } + + errno = 0; + pid_filter = (pid_t)strtol(argv[1], NULL, 10); + err = -errno; + if (err != 0 || pid_filter <= 0) { + fprintf(stderr, "Failed to parse PID '%s'\n", argv[1]); + show_help(argv[0]); + return 1; + } + + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + skel = snooper_bpf__open_and_load(); + if (!skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + err = -1; + goto cleanup; + } + + symbolizer = blaze_symbolizer_new(); + if (!symbolizer) { + fprintf(stderr, "Failed to create symbolizer\n"); + err = -1; + goto cleanup; + } + + rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL); + if (!rb) { + fprintf(stderr, "Failed to create ring buffer\n"); + err = -1; + goto cleanup; + } + + memset(&linfo, 0, sizeof(linfo)); + linfo.task.pid = pid_filter; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + skel->links.snoop_tasks = bpf_program__attach_iter(skel->progs.snoop_tasks, &opts); + if (!skel->links.snoop_tasks) { + err = -errno; + fprintf(stderr, "Failed to attach BPF iterator\n"); + goto cleanup; + } + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.snoop_tasks)); + if (iter_fd < 0) { + err = -errno; + fprintf(stderr, "Failed to create iterator\n"); + goto cleanup; + } + + printf("Snooping on tasks for PID %d...\n\n", pid_filter); + + /* trigger task iterator program */ + while (read(iter_fd, &dummy, sizeof(dummy)) > 0) { + /* nothing */ + } + + while (!exiting) { + err = ring_buffer__poll(rb, 100 /* timeout */); + if (err < 0 && err != -EINTR) { + fprintf(stderr, "Error polling ring buffer: %d\n", err); + break; + } + if (err == 0) + break; + } + +cleanup: + if (iter_fd >= 0) + close(iter_fd); + ring_buffer__free(rb); + snooper_bpf__destroy(skel); + blaze_symbolizer_free(symbolizer); + + return err < 0 ? -err : 0; +} diff --git a/examples/c/snooper.h b/examples/c/snooper.h new file mode 100644 index 00000000..f378149f --- /dev/null +++ b/examples/c/snooper.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2025 Meta Platforms, Inc. */ +#ifndef __SNOOPER_H_ +#define __SNOOPER_H_ + +#ifndef TASK_COMM_LEN +#define TASK_COMM_LEN 16 +#endif + +#ifndef MAX_STACK_DEPTH +#define MAX_STACK_DEPTH 128 +#endif + +typedef __u64 stack_trace_t[MAX_STACK_DEPTH]; + +struct task_event { + __u32 pid; + __u32 tid; + char comm[TASK_COMM_LEN]; + __s32 kstack_sz; + __s32 ustack_sz; + stack_trace_t kstack; + stack_trace_t ustack; +}; + +#endif /* __SNOOPER_H_ */ From d44b3833989174d41e0b4a9a90c0bc49498521e4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 2 Dec 2025 17:14:41 -0800 Subject: [PATCH 2/6] snooper: Add testing "app" for task_work-based demo Signed-off-by: Andrii Nakryiko --- examples/c/.gitignore | 4 +- examples/c/Makefile | 13 +++- examples/c/app.c | 138 ++++++++++++++++++++++++++++++++++++++++++ examples/c/app_lib.c | 5 ++ examples/c/app_lib.h | 7 +++ 5 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 examples/c/app.c create mode 100644 examples/c/app_lib.c create mode 100644 examples/c/app_lib.h diff --git a/examples/c/.gitignore b/examples/c/.gitignore index 59362939..4490adc1 100644 --- a/examples/c/.gitignore +++ b/examples/c/.gitignore @@ -16,5 +16,7 @@ /lsm /cmake-build-debug/ /cmake-build-release/ -/snooper compile_commands.json +/app +/libapp.so +/snooper diff --git a/examples/c/Makefile b/examples/c/Makefile index 4da26994..13584c34 100644 --- a/examples/c/Makefile +++ b/examples/c/Makefile @@ -70,12 +70,12 @@ $(call allow-override,CC,$(CROSS_COMPILE)cc) $(call allow-override,LD,$(CROSS_COMPILE)ld) .PHONY: all -all: $(APPS) +all: $(APPS) app .PHONY: clean clean: $(call msg,CLEAN) - $(Q)rm -rf $(OUTPUT) $(APPS) + $(Q)rm -rf $(OUTPUT) $(APPS) app libapp.so $(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT): $(call msg,MKDIR,$@) @@ -136,3 +136,12 @@ $(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT) # keep intermediate (.skel.h, .bpf.o, etc) targets .SECONDARY: + +# Build target app and its shared library +libapp.so: app_lib.c app_lib.h + $(call msg,SHLIB,$@) + $(Q)$(CC) $(CFLAGS) -shared -fPIC -o $@ app_lib.c + +app: app.c app_lib.h libapp.so + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) -o $@ app.c -L. -lapp -Wl,-rpath,'$$ORIGIN' -lpthread diff --git a/examples/c/app.c b/examples/c/app.c new file mode 100644 index 00000000..ba837120 --- /dev/null +++ b/examples/c/app.c @@ -0,0 +1,138 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "app_lib.h" + +static __thread int tls_dont_care; /* just to avoid zero offsets everywhere else */ + +__thread int tls_exec; +extern __thread int tls_shared; + +static __thread int tls_local_exec; + +int __attribute__((weak)) get_tls_exec(void) +{ + return tls_exec; +} + +int __attribute__((weak)) get_tls_shared(void) +{ + return tls_shared; +} + +int __attribute__((weak)) get_tls_local_exec(void) +{ + return tls_local_exec; +} + +/* Forward declarations for recursive functions */ +void func_a(int depth); +void func_b(int depth); +void func_c(int depth); + +static __always_inline void func_mux(int depth) +{ + if (depth <= 0) + return; + + switch (rand() % 3) { + case 0: func_a(depth - 1); break; + case 1: func_b(depth - 1); break; + case 2: func_c(depth - 1); break; + } +} + +void func_a(int depth) +{ + volatile char stack_space[120]; + stack_space[119] = 'a'; + stack_space[0] += 1; + + if (depth <= 0) + return; + + func_mux(depth - 1); +} + +void func_b(int depth) +{ + volatile char stack_space[350]; + stack_space[349] = 'b'; + stack_space[0] += 1; + + if (depth <= 0) + return; + + func_mux(depth - 1); +} + +void func_c(int depth) +{ + volatile char stack_space[800]; + stack_space[799] = 'c'; + stack_space[0] += 1; + + if (depth <= 0) + return; + + func_mux(depth - 1); +} + +static void *thread_func(void *arg) +{ + time_t last_print = 0; + (void)arg; + + pthread_setname_np(pthread_self(), "app_thread"); + + while (1) { + time_t now; + + func_mux(10); + + now = time(NULL); + if (now > last_print) { + tls_exec += 4; + tls_shared += 8; + tls_local_exec += 16; + bump_tls_local_shared(); + bump_tls_local_shared(); + + printf("Hello from thread (exec=%d, shared=%d, local_exec=%d, local_shared=%d)!\n", + get_tls_exec(), get_tls_shared(), get_tls_local_exec(), get_tls_local_shared()); + last_print = now; + } + } + + return NULL; +} + +int main() { + pthread_t thread; + + pthread_create(&thread, NULL, thread_func, NULL); + + while (1) { + tls_dont_care += 1; + tls_exec += 2; + tls_shared += 4; + tls_local_exec += 8; + bump_tls_local_shared(); + + printf("Hello from app (exec=%d, shared=%d, local_exec=%d, local_shared=%d)!\n", + get_tls_exec(), get_tls_shared(), get_tls_local_exec(), get_tls_local_shared()); + sleep(1); + } + + return 0; +} diff --git a/examples/c/app_lib.c b/examples/c/app_lib.c new file mode 100644 index 00000000..e9a04f39 --- /dev/null +++ b/examples/c/app_lib.c @@ -0,0 +1,5 @@ +__thread int tls_shared; +static __thread int tls_local_shared; + +int get_tls_local_shared(void) { return tls_local_shared; } +void bump_tls_local_shared(void) { tls_local_shared += 16; } diff --git a/examples/c/app_lib.h b/examples/c/app_lib.h new file mode 100644 index 00000000..989764b7 --- /dev/null +++ b/examples/c/app_lib.h @@ -0,0 +1,7 @@ +#ifndef APP_LIB_H +#define APP_LIB_H + +int get_tls_local_shared(void); +void bump_tls_local_shared(void); + +#endif /* APP_LIB_H */ From 3cbaa371f0231b38a6f415212528b64f9eea5c92 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 4 Dec 2025 14:34:22 -0800 Subject: [PATCH 3/6] snooper: add task's VMA iteration and ELF symbol parsing Signed-off-by: Andrii Nakryiko --- examples/c/snooper.bpf.c | 364 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 355 insertions(+), 9 deletions(-) diff --git a/examples/c/snooper.bpf.c b/examples/c/snooper.bpf.c index a182b095..99877ef9 100644 --- a/examples/c/snooper.bpf.c +++ b/examples/c/snooper.bpf.c @@ -9,6 +9,42 @@ char LICENSE[] SEC("license") = "Dual BSD/GPL"; +/* Error codes - can't include errno.h in BPF */ +#define ENOENT 2 +#define EOPNOTSUPP 95 +#define EPROTO 71 + +extern int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) __ksym __weak; +extern int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) __ksym __weak; + +/* ========== ELF constants ========== */ + +#define ELFMAG0 0x7f +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' + +#define ELFCLASS64 2 +#define EI_CLASS 4 + +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_DYNSYM 11 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 +#define STT_COMMON 5 +#define STT_TLS 6 +#define ELF64_ST_TYPE(info) ((info) & 0xf) + +#define VM_EXEC 0x00000004 + +#define SHN_XINDEX 0xffff +#define MAX_SYM_NAME 64 + struct task_state { struct task_event event; struct bpf_task_work tw; @@ -17,7 +53,7 @@ struct task_state { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 4096); - __type(key, __u32); + __type(key, u32); __type(value, struct task_state); } task_states SEC(".maps"); @@ -26,6 +62,40 @@ struct { __uint(max_entries, 1024 * 1024); } rb SEC(".maps"); +struct elf_symtab { + u32 shndx; + u32 symtab_cnt; + u64 symtab_off; + u64 strtab_off; +}; + +struct elf { + u64 shoff; /* section headers list offset */ + u32 shnum; /* number of sections */ + + struct elf_symtab symtab, dynsym; +}; + +struct scratch { + struct elf elf; + + struct elf64_hdr ehdr; + struct elf64_shdr shdr; + struct elf64_shdr strtab_shdr; + + struct elf64_sym sym; + char sym_name[MAX_SYM_NAME]; +}; + +static int zero = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct scratch); +} scratch_map SEC(".maps"); + /* * Frame pointer-based user stack unwinding. * @@ -35,14 +105,14 @@ struct { * * We walk the chain of frame pointers to collect return addresses. */ -static int unwind_user_stack(struct task_struct *task, __u64 *stack, int max_depth) +static int unwind_user_stack(struct task_struct *task, u64 *stack, int max_depth) { struct pt_regs *regs; struct frame { - __u64 next_fp; /* saved frame pointer (rbp) */ - __u64 ret_addr; /* return address */ + u64 next_fp; /* saved frame pointer (rbp) */ + u64 ret_addr; /* return address */ } frame; - __u64 fp; + u64 fp; unsigned i = 0; regs = bpf_core_cast((void *)bpf_task_pt_regs(task), struct pt_regs); @@ -64,7 +134,282 @@ static int unwind_user_stack(struct task_struct *task, __u64 *stack, int max_dep fp = frame.next_fp; } - return i * sizeof(__u64); + return i * sizeof(u64); +} + +static int parse_elf(struct bpf_dynptr *fdptr, struct elf *elf, struct scratch *s) +{ + int err, i; + + /* ELF header */ + err = bpf_dynptr_read(&s->ehdr, sizeof(s->ehdr), fdptr, 0, 0); + if (err) { + bpf_printk(" [ELF] Failed to read ELF header: %d", err); + return err; + } + + /* Verify ELF magic */ + if (s->ehdr.e_ident[0] != ELFMAG0 || s->ehdr.e_ident[1] != ELFMAG1 || + s->ehdr.e_ident[2] != ELFMAG2 || s->ehdr.e_ident[3] != ELFMAG3) { + bpf_printk(" [ELF] Not an ELF file"); + return -EPROTO; + } + + /* Only support 64-bit ELF for now */ + if (s->ehdr.e_ident[EI_CLASS] != ELFCLASS64) { + bpf_printk(" [ELF] Not 64-bit ELF"); + return -EOPNOTSUPP; + } + + elf->shoff = s->ehdr.e_shoff; + elf->shnum = s->ehdr.e_shnum; + + //bpf_printk(" [ELF] Section headers: off=%llu, num=%u", elf->shoff, elf->shnum); + if (elf->shnum == 0 || elf->shnum >= SHN_XINDEX) + return -EOPNOTSUPP; + + elf->symtab.shndx = 0; + elf->dynsym.shndx = 0; + + bpf_for(i, 1, elf->shnum) { + u64 symtab_off, symtab_size, strtab_shdr_off; + u32 symtab_entsize, strtab_idx; + u64 shdr_off = elf->shoff + i * sizeof(struct elf64_shdr); + + err = bpf_dynptr_read(&s->shdr, sizeof(s->shdr), fdptr, shdr_off, 0); + if (err) { + bpf_printk(" [ELF] Failed to read shdr[%d]: %d", i, err); + break; + } + + if (s->shdr.sh_type != SHT_SYMTAB && s->shdr.sh_type != SHT_DYNSYM) + continue; + + symtab_off = s->shdr.sh_offset; + symtab_size = s->shdr.sh_size; + symtab_entsize = s->shdr.sh_entsize ?: sizeof(struct elf64_sym); + + /* sh_link points to the associated string table */ + strtab_idx = s->shdr.sh_link; + strtab_shdr_off = elf->shoff + strtab_idx * sizeof(struct elf64_shdr); + err = bpf_dynptr_read(&s->strtab_shdr, sizeof(s->strtab_shdr), fdptr, strtab_shdr_off, 0); + if (err) { + bpf_printk(" [ELF] Failed to read strtab shdr[%d]: %d", strtab_idx, err); + return err; + } + + //bpf_printk(" [ELF] Found %s: off=%llu, cnt=%llu", + // s->shdr.sh_type == SHT_SYMTAB ? ".symtab" : ".dynsym", + // symtab_off, symtab_size / symtab_entsize); + + if (s->shdr.sh_type == SHT_SYMTAB) { + elf->symtab.shndx = i; + elf->symtab.symtab_off = symtab_off; + elf->symtab.symtab_cnt = symtab_size / symtab_entsize; + elf->symtab.strtab_off = s->strtab_shdr.sh_offset; + } else { + elf->dynsym.shndx = i; + elf->dynsym.symtab_off = symtab_off; + elf->dynsym.symtab_cnt = symtab_size / symtab_entsize; + elf->dynsym.strtab_off = s->strtab_shdr.sh_offset; + } + + if (elf->dynsym.shndx && elf->symtab.shndx) + break; + } + + return 0; +} + +static const char *sym_type_str(u8 type) +{ + switch (type) { + case STT_NOTYPE: return "NOTYPE"; + case STT_OBJECT: return "OBJECT"; + case STT_FUNC: return "FUNC"; + case STT_SECTION: return "SECTION"; + case STT_FILE: return "FILE"; + case STT_COMMON: return "COMMON"; + case STT_TLS: return "TLS"; + default: return "UNKNOWN"; + } +} + +static int find_sym(struct bpf_dynptr *fdptr, struct elf_symtab *symtab, + const char *sym_name, int sym_type, + struct scratch *s) +{ + int err, i, j; + + if (!symtab->shndx) + return -ENOENT; + + bpf_for(i, 1, symtab->symtab_cnt) { + u64 sym_off = symtab->symtab_off + i * sizeof(struct elf64_sym); + u8 type; + bool match; + + err = bpf_dynptr_read(&s->sym, sizeof(s->sym), fdptr, sym_off, 0); + if (err) + return err; + + /* skip anonymous or external symbols */ + if (s->sym.st_name == 0 || s->sym.st_shndx == 0) + continue; + + type = ELF64_ST_TYPE(s->sym.st_info); + if (sym_type && type != sym_type) + continue; + + err = bpf_dynptr_read(s->sym_name, sizeof(s->sym_name), fdptr, + symtab->strtab_off + s->sym.st_name, 0); + if (err) + return err; + s->sym_name[sizeof(s->sym_name) - 1] = '\0'; + + if (bpf_strcmp(s->sym_name, sym_name) != 0) + continue; + + return i; + } + + return -ENOENT; +} + +/* + * Iterate symbols from a symbol table and print all symbols. + */ +static void print_symtab(struct bpf_dynptr *fdptr, struct elf_symtab *symtab, + const char *name, struct scratch *s) +{ + int err, i; + + if (!symtab->shndx) + return; + + bpf_printk(" [ELF] Parsing %s (%u symbols):", name, symtab->symtab_cnt); + bpf_for(i, 1, symtab->symtab_cnt) { + u64 sym_off = symtab->symtab_off + i * sizeof(struct elf64_sym); + u8 sym_type; + + err = bpf_dynptr_read(&s->sym, sizeof(s->sym), fdptr, sym_off, 0); + if (err) + break; + + if (s->sym.st_name == 0) + continue; + + /* Skip undefined symbols (external references) */ + if (s->sym.st_shndx == 0) + continue; + + err = bpf_dynptr_read(s->sym_name, sizeof(s->sym_name), fdptr, + symtab->strtab_off + s->sym.st_name, 0); + if (err) { + bpf_printk(" [SYM] Failed to read symbol #%d: %d\n", i, err); + break; + } + s->sym_name[sizeof(s->sym_name) - 1] = '\0'; + + sym_type = ELF64_ST_TYPE(s->sym.st_info); + + bpf_printk(" [SYM] 0x%llx %s %s", s->sym.st_value, sym_type_str(sym_type), s->sym_name); + } +} + +/* + * Parse ELF file and print all symbols using bpf_printk. + */ +static void parse_elf_symbols(struct bpf_dynptr *fdptr, struct elf *elf, struct scratch *s) +{ + print_symtab(fdptr, &elf->symtab, ".symtab", s); + print_symtab(fdptr, &elf->dynsym, ".dynsym", s); +} + +int MINUS_ONE = -1; + +/* + * Iterate VMAs of the current task, find executable file-backed VMAs, + * and parse their ELF symbols. + */ +static int enumerate_vmas(struct task_struct *task) +{ + struct vm_area_struct *vma; + struct scratch *s; + u64 last_ino = MINUS_ONE; + int err; + + s = bpf_map_lookup_elem(&scratch_map, &zero); + if (!s) + return 0; /* can't happen */ + + bpf_printk("[VMA] Enumerating VMAs for task %d (%s)", task->pid, task->comm); + + bpf_for_each(task_vma, vma, task, 0) { + struct bpf_dynptr fdptr; + struct inode *inode; + struct file *file; + + if (!(vma->vm_flags & VM_EXEC)) + continue; + + file = vma->vm_file; + if (!file) + continue; + inode = file->f_inode; + if (!inode) + continue; + + /* + * This is a cheap and effective way to minimize reparsing of the same ELF, but + * it doesn't guarantee that each unique inode will be processed just once. This + * is acceptable for an example, though. + */ + u64 ino = inode->i_ino; + if (last_ino == ino) + continue; + + const char *vma_name = (const char *)file->f_path.dentry->d_name.name; + bpf_printk("[VMA] Executable file-backed VMA: 0x%lx-0x%lx (ino=%llu, name=%s)", + vma->vm_start, vma->vm_end, ino, vma_name); + + + err = bpf_dynptr_from_file(file, 0, &fdptr); + if (err) { + bpf_printk(" [ELF] Failed to create dynptr for (ino=%llu, name=%s): %d", ino, vma_name, err); + goto next; + } + + err = parse_elf(&fdptr, &s->elf, s); + if (err) + goto next; + + //parse_elf_symbols(&fdptr, &s->elf, s); + + if (task->pid != task->tgid) + goto next; + + int sym_idx = find_sym(&fdptr, &s->elf.dynsym, "tls_shared", STT_TLS, s); + if (sym_idx > 0) { + bpf_printk("FOUND TLS SYM '%s' in .dynsym for '%s': st_value=%llx sz=%llu, shndx=%u\n", + s->sym_name, vma_name, + s->sym.st_value, s->sym.st_size, s->sym.st_shndx); + goto next; + } + sym_idx = find_sym(&fdptr, &s->elf.symtab, "tls_shared", STT_TLS, s); + if (sym_idx > 0) { + bpf_printk("FOUND TLS SYM '%s' in .symtab for '%s': st_value=%llx sz=%llu, shndx=%u\n", + s->sym_name, vma_name, + s->sym.st_value, s->sym.st_size, s->sym.st_shndx); + } + +next: + bpf_dynptr_file_discard(&fdptr); + + last_ino = ino; + } + + return 0; } static int task_work_cb(struct bpf_map *map, void *key, void *value) @@ -72,7 +417,7 @@ static int task_work_cb(struct bpf_map *map, void *key, void *value) struct task_struct *task = bpf_get_current_task_btf(); struct task_state *state = value; struct task_event *event = &state->event; - __u32 tid = task->pid; + u32 tid = task->pid; if (event->tid != task->pid) { bpf_printk("MISMATCHED PID %d != expected %d", task->pid, event->tid); @@ -81,8 +426,9 @@ static int task_work_cb(struct bpf_map *map, void *key, void *value) event->ustack_sz = unwind_user_stack(task, event->ustack, MAX_STACK_DEPTH); - bpf_ringbuf_output(&rb, event, sizeof(*event), 0); + enumerate_vmas(task); + bpf_ringbuf_output(&rb, event, sizeof(*event), 0); cleanup: bpf_map_delete_elem(&task_states, key); return 0; @@ -104,7 +450,7 @@ int snoop_tasks(struct bpf_iter__task *ctx) struct task_struct *task = ctx->task; struct task_state *state; struct task_event *event; - __u32 tid; + u32 tid; int err; if (!task) From 3ec985d32add96f2c347b9b956623f3e9718b46b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 5 Dec 2025 12:29:26 -0800 Subject: [PATCH 4/6] snooper: complete general dynamic and local exec TLS mode support Signed-off-by: Andrii Nakryiko --- examples/c/app.c | 2 + examples/c/app_lib.c | 3 + examples/c/snooper.bpf.c | 376 +++++++++++++++++++++++++++++++++------ examples/c/snooper.c | 12 +- examples/c/snooper.h | 2 + 5 files changed, 333 insertions(+), 62 deletions(-) diff --git a/examples/c/app.c b/examples/c/app.c index ba837120..9038cf8b 100644 --- a/examples/c/app.c +++ b/examples/c/app.c @@ -98,7 +98,9 @@ static void *thread_func(void *arg) while (1) { time_t now; + errno = 123456789; func_mux(10); + errno = 987654321; now = time(NULL); if (now > last_print) { diff --git a/examples/c/app_lib.c b/examples/c/app_lib.c index e9a04f39..b7c8aaff 100644 --- a/examples/c/app_lib.c +++ b/examples/c/app_lib.c @@ -1,5 +1,8 @@ __thread int tls_shared; +__thread int tls_shared2; static __thread int tls_local_shared; int get_tls_local_shared(void) { return tls_local_shared; } +int get_tls_shared(void) { return tls_shared; } +int get_tls_shared2(void) { return tls_shared2; } void bump_tls_local_shared(void) { tls_local_shared += 16; } diff --git a/examples/c/snooper.bpf.c b/examples/c/snooper.bpf.c index 99877ef9..c781d05e 100644 --- a/examples/c/snooper.bpf.c +++ b/examples/c/snooper.bpf.c @@ -27,8 +27,13 @@ extern int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) __ksym __weak; #define ELFCLASS64 2 #define EI_CLASS 4 +/* ELF types (e_type) */ +#define ET_EXEC 2 /* Executable file */ +#define ET_DYN 3 /* Shared object file */ + #define SHT_SYMTAB 2 #define SHT_STRTAB 3 +#define SHT_RELA 4 #define SHT_DYNSYM 11 #define STT_NOTYPE 0 @@ -38,7 +43,13 @@ extern int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) __ksym __weak; #define STT_FILE 4 #define STT_COMMON 5 #define STT_TLS 6 + #define ELF64_ST_TYPE(info) ((info) & 0xf) +#define ELF64_R_SYM(info) ((info) >> 32) +#define ELF64_R_TYPE(info) ((info) & 0xffffffff) + +#define R_X86_64_DTPMOD64 16 +#define R_X86_64_DTPOFF64 17 #define VM_EXEC 0x00000004 @@ -69,11 +80,21 @@ struct elf_symtab { u64 strtab_off; }; +struct elf_relasec { + u32 shndx; + u32 rela_cnt; + u64 rela_off; +}; + struct elf { + u16 type; /* ET_EXEC or ET_DYN */ u64 shoff; /* section headers list offset */ u32 shnum; /* number of sections */ - struct elf_symtab symtab, dynsym; + struct elf_symtab symtab; + + struct elf_symtab dynsym; + struct elf_relasec rela_dyn; /* .rela.dyn section info */ }; struct scratch { @@ -84,11 +105,14 @@ struct scratch { struct elf64_shdr strtab_shdr; struct elf64_sym sym; + struct elf64_rela rela; char sym_name[MAX_SYM_NAME]; }; static int zero = 0; +char tls_var_name[64]; + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); @@ -161,6 +185,7 @@ static int parse_elf(struct bpf_dynptr *fdptr, struct elf *elf, struct scratch * return -EOPNOTSUPP; } + elf->type = s->ehdr.e_type; elf->shoff = s->ehdr.e_shoff; elf->shnum = s->ehdr.e_shnum; @@ -170,6 +195,7 @@ static int parse_elf(struct bpf_dynptr *fdptr, struct elf *elf, struct scratch * elf->symtab.shndx = 0; elf->dynsym.shndx = 0; + elf->rela_dyn.shndx = 0; bpf_for(i, 1, elf->shnum) { u64 symtab_off, symtab_size, strtab_shdr_off; @@ -182,39 +208,48 @@ static int parse_elf(struct bpf_dynptr *fdptr, struct elf *elf, struct scratch * break; } - if (s->shdr.sh_type != SHT_SYMTAB && s->shdr.sh_type != SHT_DYNSYM) - continue; - - symtab_off = s->shdr.sh_offset; - symtab_size = s->shdr.sh_size; - symtab_entsize = s->shdr.sh_entsize ?: sizeof(struct elf64_sym); - - /* sh_link points to the associated string table */ - strtab_idx = s->shdr.sh_link; - strtab_shdr_off = elf->shoff + strtab_idx * sizeof(struct elf64_shdr); - err = bpf_dynptr_read(&s->strtab_shdr, sizeof(s->strtab_shdr), fdptr, strtab_shdr_off, 0); - if (err) { - bpf_printk(" [ELF] Failed to read strtab shdr[%d]: %d", strtab_idx, err); - return err; + if (s->shdr.sh_type == SHT_RELA) { + /* Handle .rela.dyn section (SHT_RELA linked to .dynsym) */ + u32 rela_entsize = s->shdr.sh_entsize ?: sizeof(struct elf64_rela); + + if (elf->rela_dyn.shndx == 0) { + /* TODO: validate that shdr.sh_link points to SHT_DYNSYM section */ + elf->rela_dyn.shndx = i; + elf->rela_dyn.rela_off = s->shdr.sh_offset; + elf->rela_dyn.rela_cnt = s->shdr.sh_size / rela_entsize; + } + } else if (s->shdr.sh_type == SHT_SYMTAB || s->shdr.sh_type == SHT_DYNSYM) { + symtab_off = s->shdr.sh_offset; + symtab_size = s->shdr.sh_size; + symtab_entsize = s->shdr.sh_entsize ?: sizeof(struct elf64_sym); + + /* sh_link points to the associated string table */ + strtab_idx = s->shdr.sh_link; + strtab_shdr_off = elf->shoff + strtab_idx * sizeof(struct elf64_shdr); + err = bpf_dynptr_read(&s->strtab_shdr, sizeof(s->strtab_shdr), fdptr, strtab_shdr_off, 0); + if (err) { + bpf_printk(" [ELF] Failed to read strtab shdr[%d]: %d", strtab_idx, err); + return err; + } + + //bpf_printk(" [ELF] Found %s: off=%llu, cnt=%llu", + // s->shdr.sh_type == SHT_SYMTAB ? ".symtab" : ".dynsym", + // symtab_off, symtab_size / symtab_entsize); + + if (s->shdr.sh_type == SHT_SYMTAB) { + elf->symtab.shndx = i; + elf->symtab.symtab_off = symtab_off; + elf->symtab.symtab_cnt = symtab_size / symtab_entsize; + elf->symtab.strtab_off = s->strtab_shdr.sh_offset; + } else { + elf->dynsym.shndx = i; + elf->dynsym.symtab_off = symtab_off; + elf->dynsym.symtab_cnt = symtab_size / symtab_entsize; + elf->dynsym.strtab_off = s->strtab_shdr.sh_offset; + } } - //bpf_printk(" [ELF] Found %s: off=%llu, cnt=%llu", - // s->shdr.sh_type == SHT_SYMTAB ? ".symtab" : ".dynsym", - // symtab_off, symtab_size / symtab_entsize); - - if (s->shdr.sh_type == SHT_SYMTAB) { - elf->symtab.shndx = i; - elf->symtab.symtab_off = symtab_off; - elf->symtab.symtab_cnt = symtab_size / symtab_entsize; - elf->symtab.strtab_off = s->strtab_shdr.sh_offset; - } else { - elf->dynsym.shndx = i; - elf->dynsym.symtab_off = symtab_off; - elf->dynsym.symtab_cnt = symtab_size / symtab_entsize; - elf->dynsym.strtab_off = s->strtab_shdr.sh_offset; - } - - if (elf->dynsym.shndx && elf->symtab.shndx) + if (elf->dynsym.shndx && elf->symtab.shndx && elf->rela_dyn.shndx) break; } @@ -235,7 +270,7 @@ static const char *sym_type_str(u8 type) } } -static int find_sym(struct bpf_dynptr *fdptr, struct elf_symtab *symtab, +static int find_symtab_sym(struct bpf_dynptr *fdptr, struct elf_symtab *symtab, const char *sym_name, int sym_type, struct scratch *s) { @@ -276,9 +311,18 @@ static int find_sym(struct bpf_dynptr *fdptr, struct elf_symtab *symtab, return -ENOENT; } -/* - * Iterate symbols from a symbol table and print all symbols. - */ +static int find_sym(struct bpf_dynptr *fdptr, struct elf *elf, const char *sym_name, int sym_type, struct scratch *s) +{ + int idx; + + idx = find_symtab_sym(fdptr, &elf->dynsym, sym_name, sym_type, s); + if (idx > 0) + return idx; + + return find_symtab_sym(fdptr, &elf->symtab, sym_name, sym_type, s); +} + +/* Iterate symbols from a symbol table and print all symbols. */ static void print_symtab(struct bpf_dynptr *fdptr, struct elf_symtab *symtab, const char *name, struct scratch *s) { @@ -317,10 +361,7 @@ static void print_symtab(struct bpf_dynptr *fdptr, struct elf_symtab *symtab, } } -/* - * Parse ELF file and print all symbols using bpf_printk. - */ -static void parse_elf_symbols(struct bpf_dynptr *fdptr, struct elf *elf, struct scratch *s) +static void print_symbols(struct bpf_dynptr *fdptr, struct elf *elf, struct scratch *s) { print_symtab(fdptr, &elf->symtab, ".symtab", s); print_symtab(fdptr, &elf->dynsym, ".dynsym", s); @@ -328,11 +369,223 @@ static void parse_elf_symbols(struct bpf_dynptr *fdptr, struct elf *elf, struct int MINUS_ONE = -1; +/* + * On x86_64, TLS is accessed via the FS segment register. + * The FS base points to the Thread Control Block (TCB). + * + * TCB layout (glibc): + * offset 0: void *tcb - self pointer + * offset 8: dtv_t *dtv - Dynamic Thread Vector + * offset 16: void *self - thread descriptor + * ... + * + * DTV layout: + * dtv[0].counter = generation/size + * dtv[1].pointer.val = TLS block for module 1 (main executable) + * dtv[2].pointer.val = TLS block for module 2 (first shared lib) + * ... + * + * For Initial Exec (IE) model (main executable TLS): + * TLS vars are accessed as negative offsets from TP (thread pointer) + * TP = fsbase (on x86_64 with glibc) + * + * For General Dynamic (GD) model (shared library TLS): + * __tls_get_addr() is called with {module_id, offset} + * Returns: dtv[module_id].pointer.val + offset + */ + +/* https://github.com/bminor/glibc/blob/master/sysdeps/generic/dl-dtv.h#L29 */ +typedef union dtv { + size_t counter; + struct dtv_pointer { + void *val; + void *to_free; + } pointer; +} dtv_t; + +/* Partial definition for tcbhead_t + * https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 + */ +typedef struct { + void *tcb; + dtv_t *dtv; +} tcbhead_t; + +struct tls_index { + long mod_id; + long offset; +}; + +/* + * Find the GOT entry offset for a TLS symbol by scanning .rela.dyn for + * R_X86_64_DTPMOD64 relocations matching the symbol index. + * + * At runtime, this GOT entry contains {module_id, tls_offset} which can be + * read from the loaded library's memory. + * + * Returns: GOT virt offset on success, negative error on failure + */ +static long find_tls_got_entry(struct bpf_dynptr *fdptr, struct elf *elf, + u32 sym_idx, struct scratch *s) +{ + int err, i; + + if (!elf->rela_dyn.shndx) + return -ENOENT; + + bpf_for(i, 0, elf->rela_dyn.rela_cnt) { + u64 rela_off = elf->rela_dyn.rela_off + i * sizeof(struct elf64_rela); + u32 rela_sym, rela_type; + + err = bpf_dynptr_read(&s->rela, sizeof(s->rela), fdptr, rela_off, 0); + if (err) + return err; + + rela_type = ELF64_R_TYPE(s->rela.r_info); + if (rela_type != R_X86_64_DTPMOD64) + continue; + + rela_sym = ELF64_R_SYM(s->rela.r_info); + if (sym_idx && rela_sym != sym_idx) + continue; + + /* r_offset is the GOT entry offset */ + return s->rela.r_offset; + } + + return -ENOENT; +} + +/* Read tls_index {module_id, offset} from loaded library memory. */ +static inline int read_got_entry(struct task_struct *task, struct vm_area_struct *vma, const char *vma_name, + struct bpf_dynptr *fdptr, struct scratch *s, long got_off, struct tls_index *tls_index) +{ + /* TODO: this should translate file offset to virtoffset by looking at section header */ + long got_addr = vma->vm_start - vma->vm_pgoff * __PAGE_SIZE + got_off; + + int err = bpf_copy_from_user_task(tls_index, sizeof(*tls_index), (void *)got_addr, task, 0); + if (err) { + bpf_printk("[TLS] Failed to read GOT entry for '%s' at %px: %d", vma_name, got_addr, err); + return -EPROTO; + } + + bpf_printk("[TLS] GOT TLS index for '%s' at %px: module_id=%ld, offset=%ld", + vma_name, got_addr, tls_index->mod_id, tls_index->offset); + + return 0; +} + +/* Figure out absolute address of a TLS variable identified by module ID + offset */ +static long find_tls_addr(struct task_struct *task, long module_id, long offset) +{ + long dtv_ptr, tls_block; + int err; + + long fsbase = task->thread.fsbase; + + /* Read DTV pointer from TCB (offset 8) */ + err = bpf_copy_from_user_task(&dtv_ptr, sizeof(dtv_ptr), + (void *)(fsbase + offsetof(tcbhead_t, dtv)), task, 0); + if (err) { + bpf_printk("[TLS] Failed to read DTV pointer: %d", err); + return err; + } + + //bpf_printk("[TLS] fsbase=%px, dtv=%px", fsbase, dtv_ptr); + + /* + * Read TLS block pointer from DTV[module_id]. + * Each DTV entry is 16 bytes (see dtv_t above). + */ + err = bpf_copy_from_user_task(&tls_block, sizeof(tls_block), + (void *)(dtv_ptr + module_id * sizeof(dtv_t)), task, 0); + if (err) { + bpf_printk("[TLS] Failed to read DTV[%ld]: %d", module_id, err); + return err; + } + + //bpf_printk("[TLS] dtv[%ld].val = %px", module_id, tls_block); + + /* Special value -1 means TLS block not yet allocated */ + if (tls_block == (u64)-1) { + bpf_printk("[TLS] TLS block not allocated for module %ld", module_id); + return -ENOENT; + } + + return tls_block + offset; +} + +static long find_tls_var(struct task_struct *task, struct vm_area_struct *vma, const char *vma_name, + struct bpf_dynptr *fdptr, const char *tls_var_name, struct scratch *s) +{ + struct tls_index tls_index; + int err; + + int sym_idx = find_symtab_sym(fdptr, &s->elf.dynsym, tls_var_name, STT_TLS, s); + if (sym_idx > 0) { + bpf_printk("[TLS] Found TLS symbol '%s' in .dynsym for '%s': st_value=%llx sz=%llu shndx=%u", + s->sym_name, vma_name, + s->sym.st_value, s->sym.st_size, s->sym.st_shndx); + + long got_off = find_tls_got_entry(fdptr, &s->elf, sym_idx, s); + //bpf_printk("[TLS] GOT entry at virt offset 0x%llx", got_off); + if (got_off < 0) { + bpf_printk("[TLS] No GOT entry found for symbol #%d: %ld", sym_idx, got_off); + return -EPROTO; + } + + /* Read tls_index {module_id, offset} from loaded library memory. */ + err = read_got_entry(task, vma, vma_name, fdptr, s, got_off, &tls_index); + if (err) { + bpf_printk("[TLS] Failed reading GOT entry symbol #%d at %px: %ld", + sym_idx, got_off); + return err; + } + + return find_tls_addr(task, tls_index.mod_id, tls_index.offset); + } + + /* local TLS variable not in .dynsym */ + sym_idx = find_symtab_sym(fdptr, &s->elf.symtab, tls_var_name, STT_TLS, s); + if (sym_idx > 0) { + bpf_printk("[TLS] Found TLS symbol '%s' in .symtab for '%s': st_value=%llx sz=%llu shndx=%u", + s->sym_name, vma_name, + s->sym.st_value, s->sym.st_size, s->sym.st_shndx); + + if (s->elf.type == ET_EXEC) { + /* for local exec TLS model, module ID is 1 */ + return find_tls_addr(task, 1, s->sym.st_value); + } else { + /* + * For local symbol in shared lib, try to find module ID using *ANY* + * DTPMOD64 relo, and then assume that st_value gives us valid offset + * within module's block. + */ + long got_off = find_tls_got_entry(fdptr, &s->elf, 0, s); + if (got_off < 0) { + bpf_printk("[TLS] No GOT entry (any at all) found in '%s': %ld", vma_name, got_off); + return -EOPNOTSUPP; + } + + /* Read tls_index {module_id, offset} from loaded library memory. */ + err = read_got_entry(task, vma, vma_name, fdptr, s, got_off, &tls_index); + if (err) + return err; + + return find_tls_addr(task, tls_index.mod_id, s->sym.st_value); + } + + return -EOPNOTSUPP; + } + + return -ENOENT; +} + /* * Iterate VMAs of the current task, find executable file-backed VMAs, * and parse their ELF symbols. */ -static int enumerate_vmas(struct task_struct *task) +static int enumerate_vmas(struct task_struct *task, struct task_event *event) { struct vm_area_struct *vma; struct scratch *s; @@ -343,7 +596,7 @@ static int enumerate_vmas(struct task_struct *task) if (!s) return 0; /* can't happen */ - bpf_printk("[VMA] Enumerating VMAs for task %d (%s)", task->pid, task->comm); + //bpf_printk("[VMA] Enumerating VMAs for task %d (%s)", task->pid, task->comm); bpf_for_each(task_vma, vma, task, 0) { struct bpf_dynptr fdptr; @@ -370,9 +623,8 @@ static int enumerate_vmas(struct task_struct *task) continue; const char *vma_name = (const char *)file->f_path.dentry->d_name.name; - bpf_printk("[VMA] Executable file-backed VMA: 0x%lx-0x%lx (ino=%llu, name=%s)", - vma->vm_start, vma->vm_end, ino, vma_name); - + //bpf_printk("[VMA] Executable file-backed VMA: 0x%lx-0x%lx (ino=%llu, name=%s)", + // vma->vm_start, vma->vm_end, ino, vma_name); err = bpf_dynptr_from_file(file, 0, &fdptr); if (err) { @@ -384,25 +636,30 @@ static int enumerate_vmas(struct task_struct *task) if (err) goto next; - //parse_elf_symbols(&fdptr, &s->elf, s); + //print_symbols(&fdptr, &s->elf, s); - if (task->pid != task->tgid) + long tls_addr = find_tls_var(task, vma, vma_name, &fdptr, tls_var_name, s); + if (tls_addr == -ENOENT) goto next; - - int sym_idx = find_sym(&fdptr, &s->elf.dynsym, "tls_shared", STT_TLS, s); - if (sym_idx > 0) { - bpf_printk("FOUND TLS SYM '%s' in .dynsym for '%s': st_value=%llx sz=%llu, shndx=%u\n", - s->sym_name, vma_name, - s->sym.st_value, s->sym.st_size, s->sym.st_shndx); + if (tls_addr < 0) { + bpf_printk("[TLS] Failed to figure TLS address of '%s' variable: %ld", tls_var_name, tls_addr); goto next; } - sym_idx = find_sym(&fdptr, &s->elf.symtab, "tls_shared", STT_TLS, s); - if (sym_idx > 0) { - bpf_printk("FOUND TLS SYM '%s' in .symtab for '%s': st_value=%llx sz=%llu, shndx=%u\n", - s->sym_name, vma_name, - s->sym.st_value, s->sym.st_size, s->sym.st_shndx); + + /* Read the actual TLS variable */ + int val; + err = bpf_copy_from_user_task(&val, sizeof(val), (void *)tls_addr, task, 0); + if (err) { + bpf_printk("[TLS] Failed to read TLS var at %px: %d", tls_addr, err); + goto next; } + bpf_printk("[TLS] TLS variable '%s' found in '%s' (TID %d '%s') = %d", + tls_var_name, vma_name, task->pid, task->comm, val); + + event->has_tls = true; + event->tls_value = val; + next: bpf_dynptr_file_discard(&fdptr); @@ -424,9 +681,10 @@ static int task_work_cb(struct bpf_map *map, void *key, void *value) goto cleanup; } + event->has_tls = false; event->ustack_sz = unwind_user_stack(task, event->ustack, MAX_STACK_DEPTH); - enumerate_vmas(task); + enumerate_vmas(task, event); bpf_ringbuf_output(&rb, event, sizeof(*event), 0); cleanup: diff --git a/examples/c/snooper.c b/examples/c/snooper.c index 1918a08c..1ea5cd4d 100644 --- a/examples/c/snooper.c +++ b/examples/c/snooper.c @@ -17,6 +17,7 @@ static struct blaze_symbolizer *symbolizer; static volatile bool exiting = false; +struct snooper_bpf *skel; static void sig_handler(int sig) { @@ -103,6 +104,9 @@ static int handle_event(void *ctx, void *data, size_t size) printf("Task: %s (PID=%d, TID=%d)\n", event->comm, event->pid, event->tid); + if (event->has_tls) + printf(" TLS: %s = %d\n", skel->bss->tls_var_name, (int)event->tls_value); + /* Show kernel stack trace */ if (event->kstack_sz > 0) { printf(" Kernel stack:\n"); @@ -129,14 +133,13 @@ static int handle_event(void *ctx, void *data, size_t size) static void show_help(const char *progname) { - printf("Usage: %s \n", progname); + printf("Usage: %s \n", progname); printf(" PID Process ID to filter tasks (required)\n"); } int main(int argc, char **argv) { struct ring_buffer *rb = NULL; - struct snooper_bpf *skel = NULL; LIBBPF_OPTS(bpf_iter_attach_opts, opts); union bpf_iter_link_info linfo; pid_t pid_filter = 0; @@ -144,7 +147,7 @@ int main(int argc, char **argv) int err = 0; char dummy; - if (argc < 2) { + if (argc < 3) { show_help(argv[0]); return 1; } @@ -168,6 +171,9 @@ int main(int argc, char **argv) goto cleanup; } + snprintf(skel->bss->tls_var_name, sizeof(skel->bss->tls_var_name), + "%s", argv[2]); + symbolizer = blaze_symbolizer_new(); if (!symbolizer) { fprintf(stderr, "Failed to create symbolizer\n"); diff --git a/examples/c/snooper.h b/examples/c/snooper.h index f378149f..ea5d13a6 100644 --- a/examples/c/snooper.h +++ b/examples/c/snooper.h @@ -21,6 +21,8 @@ struct task_event { __s32 ustack_sz; stack_trace_t kstack; stack_trace_t ustack; + bool has_tls; + long tls_value; }; #endif /* __SNOOPER_H_ */ From 36456f9d1b044bf4a345451cb11d528ba56ac5bd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 5 Dec 2025 15:37:01 -0800 Subject: [PATCH 5/6] snooper: find PyVersion and report it Signed-off-by: Andrii Nakryiko --- examples/c/snooper.bpf.c | 22 ++++++++++++++++++++++ examples/c/snooper.c | 7 +++++++ examples/c/snooper.h | 1 + 3 files changed, 30 insertions(+) diff --git a/examples/c/snooper.bpf.c b/examples/c/snooper.bpf.c index c781d05e..f7f8b6f6 100644 --- a/examples/c/snooper.bpf.c +++ b/examples/c/snooper.bpf.c @@ -636,6 +636,27 @@ static int enumerate_vmas(struct task_struct *task, struct task_event *event) if (err) goto next; + if (task->pid == task->tgid) { + int sym_idx = find_sym(&fdptr, &s->elf, "Py_Version", STT_OBJECT, s); + if (sym_idx > 0) { + long py_ver_addr = vma->vm_start - vma->vm_pgoff * __PAGE_SIZE + s->sym.st_value; + bpf_printk("[PY] Found 'Py_Version' global variable for PID %d (%s) in '%s' at %px", + task->pid, task->comm, vma_name, py_ver_addr); + + __u32 py_ver; + err = bpf_copy_from_user_task(&py_ver, sizeof(py_ver), (void *)py_ver_addr, task, 0); + if (err) { + bpf_printk("[PY] Failed to read Py_Version at %px for '%s': %d", + py_ver_addr, vma_name, err); + } else { + bpf_printk("[PY] PID %d (%s) is running Python v%u.%u.%u!", + task->pid, task->comm, + (u8)(py_ver >> 24), (u8)(py_ver >> 16), (u8)(py_ver >> 8), py_ver); + event->py_ver = py_ver; + } + } + } + //print_symbols(&fdptr, &s->elf, s); long tls_addr = find_tls_var(task, vma, vma_name, &fdptr, tls_var_name, s); @@ -681,6 +702,7 @@ static int task_work_cb(struct bpf_map *map, void *key, void *value) goto cleanup; } + event->py_ver = 0; event->has_tls = false; event->ustack_sz = unwind_user_stack(task, event->ustack, MAX_STACK_DEPTH); diff --git a/examples/c/snooper.c b/examples/c/snooper.c index 1ea5cd4d..060cc725 100644 --- a/examples/c/snooper.c +++ b/examples/c/snooper.c @@ -104,6 +104,13 @@ static int handle_event(void *ctx, void *data, size_t size) printf("Task: %s (PID=%d, TID=%d)\n", event->comm, event->pid, event->tid); + if (event->py_ver) { + printf(" Running Python v%u.%u.%u!\n", + (__u8)(event->py_ver >> 24), + (__u8)(event->py_ver >> 16), + (__u8)(event->py_ver >> 8)); + } + if (event->has_tls) printf(" TLS: %s = %d\n", skel->bss->tls_var_name, (int)event->tls_value); diff --git a/examples/c/snooper.h b/examples/c/snooper.h index ea5d13a6..93a34b8f 100644 --- a/examples/c/snooper.h +++ b/examples/c/snooper.h @@ -23,6 +23,7 @@ struct task_event { stack_trace_t ustack; bool has_tls; long tls_value; + __u32 py_ver; }; #endif /* __SNOOPER_H_ */ From 96a5078b3d956d6f27a5e39b1cab800a12995bc6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 9 Dec 2025 15:12:49 -0800 Subject: [PATCH 6/6] snooper: fix up CMakeLists.txt to fix CI build Signed-off-by: Andrii Nakryiko --- examples/c/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index a923e749..78eb7f09 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -77,6 +77,7 @@ find_package(BpfObject REQUIRED) file(GLOB apps *.bpf.c) if(NOT CARGO_EXISTS) list(REMOVE_ITEM apps ${CMAKE_CURRENT_SOURCE_DIR}/profile.bpf.c) + list(REMOVE_ITEM apps ${CMAKE_CURRENT_SOURCE_DIR}/snooper.bpf.c) endif() foreach(app ${apps}) get_filename_component(app_stem ${app} NAME_WE) @@ -93,4 +94,10 @@ foreach(app ${apps}) target_link_libraries(${app_stem} ${CMAKE_CURRENT_SOURCE_DIR}/../../blazesym/target/release/libblazesym_c.a -lpthread -lrt -ldl) endif() + if(${app_stem} STREQUAL snooper) + target_include_directories(${app_stem} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../../blazesym/capi/include) + target_link_libraries(${app_stem} + ${CMAKE_CURRENT_SOURCE_DIR}/../../blazesym/target/release/libblazesym_c.a -lpthread -lrt -ldl) + endif() endforeach()