Skip to content

Commit c3ed13f

Browse files
committed
Show multiple user PF's in the output
1 parent 3394480 commit c3ed13f

3 files changed

Lines changed: 128 additions & 42 deletions

File tree

sigsegv-monitor.bpf.c

Lines changed: 100 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,6 @@
44
#include <bpf/bpf_tracing.h>
55
#include "sigsegv-monitor.h"
66

7-
// By default is commented: a lot of #PF events are hit
8-
// so enable only if it is acceptable.
9-
// #define TRACE_PF_CR2
10-
117
// if /sys/kernel/tracing/trace_on is set to 1,
128
// cat /sys/kernel/tracing/trace
139
// will show the bpf_printk() output
@@ -21,12 +17,60 @@ struct trace_event_raw_page_fault_user {
2117
char __data[0];
2218
};
2319

20+
struct cr2_stat {
21+
u64 cr2;
22+
u64 err;
23+
u64 tai;
24+
};
25+
26+
struct cr2_stats {
27+
struct cr2_stat stat[MAX_USER_PF_ENTRIES];
28+
u64 head;
29+
u64 count;
30+
};
31+
2432
struct {
2533
__uint(type, BPF_MAP_TYPE_HASH);
2634
__uint(max_entries, 1024);
2735
__type(key, u32);
28-
__type(value, u64);
36+
__type(value, struct cr2_stats);
2937
} tgid_cr2 SEC(".maps");
38+
39+
inline void cr2stats_init(struct cr2_stats* stats) {
40+
stats->head = 0;
41+
stats->count = 0;
42+
}
43+
44+
inline void cr2stats_push(struct cr2_stats* stats, struct cr2_stat* value) {
45+
if (stats->head < MAX_USER_PF_ENTRIES) {
46+
stats->stat[stats->head] = *value;
47+
48+
if (++stats->head == MAX_USER_PF_ENTRIES) {
49+
stats->head = 0;
50+
}
51+
52+
if (stats->count < MAX_USER_PF_ENTRIES) {
53+
++stats->count;
54+
}
55+
}
56+
}
57+
58+
// The `index` parameter here is not an index in the array, but an index in the ring buffer,
59+
// i.e. passing an index 0 would return the oldest element in the ring buffer.
60+
inline struct cr2_stat* cr2stats_get(struct cr2_stats* stats, u32 index) {
61+
if (stats->count == MAX_USER_PF_ENTRIES) {
62+
index += stats->head;
63+
if (index >= MAX_USER_PF_ENTRIES) {
64+
index -= MAX_USER_PF_ENTRIES;
65+
}
66+
}
67+
68+
if (index < MAX_USER_PF_ENTRIES) {
69+
return stats->stat + index;
70+
}
71+
72+
return NULL;
73+
}
3074
#endif
3175

3276
// Output map (for user space)
@@ -75,24 +119,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
75119
bpf_probe_read_kernel_str(&event->tgleader_comm, sizeof(event->tgleader_comm), &task->group_leader->comm);
76120
// TODO: can the acquisition of pidns_tgid, pidns_pid be made more robust / simplified?
77121
{
78-
struct pid const* thread_pid = task->thread_pid;
79-
unsigned int const level = thread_pid->level;
80-
// thread_pid->numbers is a size-one flexible array member (type numbers[1])
81-
// => cannot perform bounds-check against BTF information
82-
// => need bpf_probe_read_kernel to read from indices potentially > 1
83-
struct upid const* upid_inv = &thread_pid->numbers[level];
84-
event->pidns_pid = BPF_CORE_READ(upid_inv, nr); // we already have implicit CO-RE, but we need the probe function call
85-
}
86-
{
87-
struct pid const* tgid_pid = task->signal->pids[PIDTYPE_TGID];
88-
unsigned int const level = tgid_pid->level;
89-
struct upid const* tgid_upid_inv = &tgid_pid->numbers[level];
90-
// TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread?
91-
// TODO: don't we need RCU here?
92-
event->pidns_tgid = BPF_CORE_READ(tgid_upid_inv, nr);
93-
}
94-
95-
event->regs.trapno = task->thread.trap_nr; // TODO: also copy the other fields like cr2 and error_code
122+
struct pid const* thread_pid = task->thread_pid;
123+
unsigned int const level = thread_pid->level;
124+
// thread_pid->numbers is a size-one flexible array member (type numbers[1])
125+
// => cannot perform bounds-check against BTF information
126+
// => need bpf_probe_read_kernel to read from indices potentially > 1
127+
struct upid const* upid_inv = &thread_pid->numbers[level];
128+
event->pidns_pid = BPF_CORE_READ(upid_inv, nr); // we already have implicit CO-RE, but we need the probe function call
129+
}
130+
{
131+
struct pid const* tgid_pid = task->signal->pids[PIDTYPE_TGID];
132+
unsigned int const level = tgid_pid->level;
133+
struct upid const* tgid_upid_inv = &tgid_pid->numbers[level];
134+
// TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread?
135+
// TODO: don't we need RCU here?
136+
event->pidns_tgid = BPF_CORE_READ(tgid_upid_inv, nr);
137+
}
138+
139+
event->regs.trapno = task->thread.trap_nr;
96140
event->regs.err = task->thread.error_code;
97141

98142
// TODO: how are these regs acquired?
@@ -119,18 +163,28 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
119163
event->regs.flags = regs->flags;
120164

121165
event->regs.cr2 = task->thread.cr2;
122-
event->regs.cr2_fault = -1;
123-
124-
#ifdef TRACE_PF_CR2
125-
u32 tgid = task->tgid;
126-
u64 *cr2 = bpf_map_lookup_elem(&tgid_cr2, &tgid);
166+
}
127167

128-
if (cr2) {
129-
event->regs.cr2_fault = *cr2;
130-
bpf_map_delete_elem(&tgid_cr2, &tgid);
168+
event->pf_count = 0;
169+
#ifdef TRACE_PF_CR2
170+
u32 tgid = task->tgid;
171+
struct cr2_stats *cr2stats = bpf_map_lookup_elem(&tgid_cr2, &tgid);
172+
173+
if (cr2stats) {
174+
for (u32 i = 0; i < cr2stats->count && i < MAX_USER_PF_ENTRIES; i++) {
175+
struct cr2_stat* stat = cr2stats_get(cr2stats, i);
176+
if (stat) {
177+
event->pf[i].cr2 = stat->cr2;
178+
event->pf[i].err = stat->err;
179+
event->pf[i].tai = stat->tai;
180+
181+
++event->pf_count;
182+
}
131183
}
132-
#endif
184+
185+
bpf_map_delete_elem(&tgid_cr2, &tgid);
133186
}
187+
#endif
134188

135189
// TODO: when is this snapshot taken? or does the CPU not do LBR in the kernel?
136190
long ret = bpf_get_branch_snapshot(&event->lbr, sizeof(event->lbr), 0);
@@ -149,13 +203,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
149203
#ifdef TRACE_PF_CR2
150204
SEC("tracepoint/exceptions/page_fault_user")
151205
int trace_page_fault(struct trace_event_raw_page_fault_user *ctx) {
152-
u64 cr2;
206+
struct cr2_stat stat;
153207
u32 tgid;
154208

155-
cr2 = ctx->address;
209+
stat.cr2 = ctx->address;
210+
stat.err = ctx->error_code;
211+
stat.tai = bpf_ktime_get_tai_ns();
156212
tgid = bpf_get_current_pid_tgid() >> 32;
157213

158-
bpf_map_update_elem(&tgid_cr2, &tgid, &cr2, BPF_ANY);
214+
struct cr2_stats *cr2stats = bpf_map_lookup_elem(&tgid_cr2, &tgid);
215+
if (cr2stats) {
216+
cr2stats_push(cr2stats, &stat);
217+
} else {
218+
struct cr2_stats new_stats;
219+
cr2stats_init(&new_stats);
220+
cr2stats_push(&new_stats, &stat);
221+
222+
bpf_map_update_elem(&tgid_cr2, &tgid, &new_stats, BPF_ANY);
223+
}
159224

160225
return 0;
161226
}

sigsegv-monitor.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,22 @@ void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) {
9393
printf("\"flags\":\"0x%016llx\",", e->regs.flags);
9494
printf("\"trapno\":\"0x%016llx\",", e->regs.trapno);
9595
printf("\"err\":\"0x%016llx\",", e->regs.err);
96-
printf("\"cr2\":\"0x%016llx\",", e->regs.cr2);
97-
if (e->regs.cr2_fault != (u64)-1)
98-
printf("\"cr2_fault\":\"0x%016llx\"", e->regs.cr2_fault);
99-
else
100-
printf("\"cr2_fault\":null");
96+
printf("\"cr2\":\"0x%016llx\"", e->regs.cr2);
10197
printf("},");
10298

99+
#ifdef TRACE_PF_CR2
100+
printf("\"page_faults\": [");
101+
for_each(i, e->pf_count)
102+
{
103+
printf("{\"cr2\":\"0x%016llx\",\"err\":\"0x%016llx\",\"tai\":%llu}", e->pf[i].cr2, e->pf[i].err, e->pf[i].tai);
104+
105+
if (i + 1 != e->pf_count) {
106+
printf(",");
107+
}
108+
}
109+
printf("],");
110+
#endif
111+
103112
printf("\"lbr\":[");
104113
int lbr_limit = (e->lbr_count < MAX_LBR_ENTRIES) ? e->lbr_count : MAX_LBR_ENTRIES;
105114
for_each(i, lbr_limit) {

sigsegv-monitor.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
#pragma once
22

3-
43
#define MAX_LBR_ENTRIES 32
4+
#define MAX_USER_PF_ENTRIES 16
5+
6+
// By default is commented: a lot of #PF events are hit
7+
// so enable only if it is acceptable.
8+
// #define TRACE_PF_CR2
9+
10+
struct page_fault_info_t {
11+
u64 cr2;
12+
u64 err;
13+
u64 tai;
14+
};
515

616
struct user_regs_t {
717
u64 rip;
@@ -25,7 +35,6 @@ struct user_regs_t {
2535
u64 trapno;
2636
u64 err;
2737
u64 cr2;
28-
u64 cr2_fault;
2938
};
3039

3140
// WARNING: this is for the SENDING process (e.g. pid) of the signal!
@@ -45,4 +54,7 @@ struct event_t {
4554
struct perf_branch_entry lbr[MAX_LBR_ENTRIES];
4655

4756
u64 tai; // time atomic international
57+
58+
u32 pf_count;
59+
struct page_fault_info_t pf[MAX_USER_PF_ENTRIES];
4860
};

0 commit comments

Comments
 (0)