-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkern_error_monitor.c
More file actions
362 lines (302 loc) · 9.25 KB
/
kern_error_monitor.c
File metadata and controls
362 lines (302 loc) · 9.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
// SPDX-License-Identifier: GPL-2.0
/*
* kern_error_monitor.c - Userspace daemon that loads the BPF program
* and prints KERN_ERR messages as they occur.
*
* Hybrid approach:
* - BPF kprobe on vprintk_emit captures PID, process name, and device
* info at function entry (before the message is formatted).
* - /dev/kmsg provides the fully formatted message text after the
* kernel stores it.
* - Events are correlated by timestamp: BPF events always arrive
* before the corresponding /dev/kmsg entry, so a simple FIFO
* queue with timestamp sanity check works for matching.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <poll.h>
#include <bpf/libbpf.h>
#include "kern_error_monitor.h"
#include "kern_error_monitor.skel.h"
/* Maximum BPF events to cache for correlation */
#define BPF_CACHE_SIZE 64
/*
* Maximum timestamp difference (ns) for BPF/kmsg correlation.
* The BPF kprobe fires at vprintk_emit entry; the message is stored
* in the kernel log buffer within the same function call, so the gap
* is typically sub-microsecond. 50ms gives ample headroom.
*/
#define TS_MATCH_WINDOW_NS (50ULL * 1000 * 1000)
/* /dev/kmsg read buffer — kernel records are at most ~8 KiB */
#define KMSG_BUF_SIZE 8192
static volatile sig_atomic_t exiting;
static void sig_handler(int sig)
{
exiting = 1;
}
/* ------------------------------------------------------------------ */
/* BPF event FIFO cache */
/* ------------------------------------------------------------------ */
struct bpf_cached_event {
unsigned int pid;
unsigned int is_dev_err;
unsigned long long ts_ns;
char comm[TASK_COMM_LEN];
char dev_subsystem[MAX_SUBSYS_LEN];
char dev_device[MAX_DEV_LEN];
};
static struct bpf_cached_event bpf_cache[BPF_CACHE_SIZE];
static int cache_head; /* next slot to read */
static int cache_tail; /* next slot to write */
static int cache_count;
static void cache_push(const struct event *e)
{
struct bpf_cached_event *c = &bpf_cache[cache_tail];
if (cache_count == BPF_CACHE_SIZE) {
/* Overwrite oldest entry */
cache_head = (cache_head + 1) % BPF_CACHE_SIZE;
cache_count--;
}
c->pid = e->pid;
c->is_dev_err = e->is_dev_err;
c->ts_ns = e->ts_ns;
memcpy(c->comm, e->comm, sizeof(c->comm));
memcpy(c->dev_subsystem, e->dev_subsystem, sizeof(c->dev_subsystem));
memcpy(c->dev_device, e->dev_device, sizeof(c->dev_device));
cache_tail = (cache_tail + 1) % BPF_CACHE_SIZE;
cache_count++;
}
/*
* Try to dequeue a cached BPF event whose timestamp is close to
* (and at most TS_MATCH_WINDOW_NS before) the given /dev/kmsg
* timestamp. Stale entries that are too old are discarded.
*
* Returns 1 if matched (fills *out), 0 otherwise.
*/
static int cache_match(unsigned long long kmsg_ts_ns,
struct bpf_cached_event *out)
{
while (cache_count > 0) {
struct bpf_cached_event *c = &bpf_cache[cache_head];
/* BPF timestamp must be <= kmsg timestamp (fires before) */
if (c->ts_ns <= kmsg_ts_ns &&
(kmsg_ts_ns - c->ts_ns) < TS_MATCH_WINDOW_NS) {
*out = *c;
cache_head = (cache_head + 1) % BPF_CACHE_SIZE;
cache_count--;
return 1;
}
/*
* If the oldest cached event is too old, discard it and
* try the next one. Its /dev/kmsg counterpart was either
* not KERN_ERR or was missed.
*/
if (c->ts_ns + TS_MATCH_WINDOW_NS < kmsg_ts_ns) {
cache_head = (cache_head + 1) % BPF_CACHE_SIZE;
cache_count--;
continue;
}
/* Event is newer than kmsg — don't discard, no match yet */
break;
}
return 0;
}
/* ------------------------------------------------------------------ */
/* BPF ring buffer callback */
/* ------------------------------------------------------------------ */
static int handle_event(void *ctx, void *data, size_t data_sz)
{
const struct event *e = data;
cache_push(e);
return 0;
}
/* ------------------------------------------------------------------ */
/* /dev/kmsg parser */
/* ------------------------------------------------------------------ */
/*
* Parse one /dev/kmsg record.
* Format: "priority,sequence,timestamp_us,flags;message\n"
*
* Returns 0 on success, -1 on parse error.
*/
static int parse_kmsg(const char *buf, int *level,
unsigned long long *ts_us, char *msg, size_t msg_sz)
{
unsigned long long priority;
const char *p, *semi;
/* priority */
priority = strtoull(buf, (char **)&p, 10);
if (*p != ',')
return -1;
p++;
/* sequence — skip */
(void)strtoull(p, (char **)&p, 10);
if (*p != ',')
return -1;
p++;
/* timestamp (microseconds since boot) */
*ts_us = strtoull(p, (char **)&p, 10);
if (*p != ',' && *p != ';')
return -1;
/* skip to semicolon */
semi = strchr(p, ';');
if (!semi)
return -1;
semi++; /* past ';' */
*level = priority & 7;
/* copy message, strip trailing newline */
strncpy(msg, semi, msg_sz - 1);
msg[msg_sz - 1] = '\0';
size_t len = strlen(msg);
if (len > 0 && msg[len - 1] == '\n')
msg[len - 1] = '\0';
return 0;
}
/* ------------------------------------------------------------------ */
/* libbpf logging */
/* ------------------------------------------------------------------ */
static int libbpf_print_fn(enum libbpf_print_level level, const char *format,
va_list args)
{
if (level == LIBBPF_DEBUG)
return 0;
return vfprintf(stderr, format, args);
}
/* ------------------------------------------------------------------ */
/* main */
/* ------------------------------------------------------------------ */
int main(int argc, char **argv)
{
struct kern_error_monitor_bpf *skel;
struct ring_buffer *rb = NULL;
struct pollfd pfd;
int kmsg_fd = -1;
char kmsg_buf[KMSG_BUF_SIZE];
int err;
libbpf_set_print(libbpf_print_fn);
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
/* Open BPF application */
skel = kern_error_monitor_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open BPF skeleton\n");
return 1;
}
/* Load and verify BPF program */
err = kern_error_monitor_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load BPF skeleton: %d\n", err);
goto cleanup;
}
/* Attach kprobe to vprintk_emit */
err = kern_error_monitor_bpf__attach(skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton: %d\n", err);
goto cleanup;
}
/* Set up ring buffer — we call ring_buffer__consume() manually */
rb = ring_buffer__new(bpf_map__fd(skel->maps.events), handle_event,
NULL, NULL);
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
goto cleanup;
}
/* Open /dev/kmsg for fully formatted kernel messages */
kmsg_fd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK);
if (kmsg_fd < 0) {
fprintf(stderr, "Failed to open /dev/kmsg: %s\n",
strerror(errno));
err = -1;
goto cleanup;
}
/* Seek to end — only process messages generated after startup */
if (lseek(kmsg_fd, 0, SEEK_END) < 0) {
fprintf(stderr, "Failed to seek /dev/kmsg: %s\n",
strerror(errno));
err = -1;
goto cleanup;
}
printf("Monitoring kernel error messages (KERN_ERR)...\n");
printf("Press Ctrl-C to exit.\n\n");
pfd.fd = kmsg_fd;
pfd.events = POLLIN;
while (!exiting) {
int ret;
/* Drain any pending BPF events into cache */
ring_buffer__consume(rb);
/* Wait for /dev/kmsg data (or timeout for signal check) */
ret = poll(&pfd, 1, 100);
if (ret < 0) {
if (errno == EINTR)
continue;
fprintf(stderr, "poll error: %s\n", strerror(errno));
err = -1;
break;
}
if (ret == 0)
continue;
/* Drain BPF ring buffer again — events arriving between
* the first consume and the poll wakeup are captured. */
ring_buffer__consume(rb);
/* Read all available /dev/kmsg entries */
while (!exiting) {
ssize_t n = read(kmsg_fd, kmsg_buf,
sizeof(kmsg_buf) - 1);
if (n < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK)
break;
if (errno == EPIPE) {
/* Kernel ring buffer overrun — seek
* forward to current head */
lseek(kmsg_fd, 0, SEEK_END);
break;
}
if (errno == EINTR)
continue;
fprintf(stderr, "read /dev/kmsg: %s\n",
strerror(errno));
err = -1;
goto cleanup;
}
kmsg_buf[n] = '\0';
/* Skip continuation lines (multiline messages) */
if (kmsg_buf[0] == ' ')
continue;
int level;
unsigned long long ts_us;
char msg[KMSG_BUF_SIZE];
if (parse_kmsg(kmsg_buf, &level, &ts_us,
msg, sizeof(msg)) < 0)
continue;
if (level != KERN_ERR_LEVEL)
continue;
/* Correlate with a cached BPF event */
struct bpf_cached_event bpf_ev;
unsigned long long ts_ns = ts_us * 1000ULL;
int matched = cache_match(ts_ns, &bpf_ev);
unsigned long long secs = ts_us / 1000000ULL;
unsigned long long usecs = ts_us % 1000000ULL;
if (matched) {
printf("[%5llu.%06llu] %-60s (pid=%u %s)\n",
secs, usecs, msg,
bpf_ev.pid, bpf_ev.comm);
} else {
printf("[%5llu.%06llu] %s\n",
secs, usecs, msg);
}
fflush(stdout);
}
}
cleanup:
if (kmsg_fd >= 0)
close(kmsg_fd);
ring_buffer__free(rb);
kern_error_monitor_bpf__destroy(skel);
return err < 0 ? 1 : 0;
}