Skip to content

Commit 4649696

Browse files
committed
Add support for perf event group management in BPF
- Introduced `group_fd` field in the perf options structure to allow attaching BPF programs to a group of perf events. - Updated the `ks_open_perf_event` function to accept `group_fd` and handle group event management. - Implemented helper functions for managing active members of perf event groups, ensuring that group leaders cannot be detached while active members exist. - Enhanced the generated code to include necessary checks and structures for handling multiplexed perf events. - Added tests to validate the new group management features and ensure correct code generation for group-related operations.
1 parent 17a520a commit 4649696

9 files changed

Lines changed: 345 additions & 44 deletions

File tree

BUILTINS.md

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ fn main() -> i32 {
9898
- `flags`: Attachment flags (context-dependent)
9999
- Perf event form:
100100
- `handle`: Program handle returned from `load()`
101-
- `opts`: `perf_options` value — only `perf_type` and `perf_config` are required; all other fields have defaults
101+
- `opts`: `perf_options` value — only `perf_type` and `perf_config` are required; all other fields have defaults, including `group_fd=-1`
102102
- `flags`: Must be `0` for perf attaches; nonzero values are rejected
103103

104104
**Return Value:**
@@ -120,6 +120,16 @@ var perf_att = attach(perf_prog, perf_options { perf_type: perf_type_hardware, p
120120
var count = read(perf_att)
121121
detach(perf_att)
122122
detach(perf_prog)
123+
124+
// Grouped perf events: branch joins cache's leader group. Adding a member restarts the group.
125+
var cache = attach(perf_prog, perf_options { perf_type: perf_type_hardware, perf_config: cache_misses }, 0)
126+
var branch = attach(perf_prog, perf_options {
127+
perf_type: perf_type_hardware,
128+
perf_config: branch_misses,
129+
group_fd: cache.perf_fd,
130+
}, 0)
131+
detach(branch)
132+
detach(cache)
123133
```
124134

125135
**Context-specific implementations:**
@@ -163,15 +173,17 @@ detach(prog) // Clean up
163173
**Variadic:** No
164174
**Context:** Userspace only
165175

166-
**Description:** Read the current hardware/software counter value from a perf attachment.
176+
**Description:** Read the current hardware/software counter value from a perf attachment. If the kernel multiplexed the event, the value is scaled with `time_enabled / time_running`.
167177

168178
**Parameters:**
169179
- `handle`: Perf attachment returned from `attach(handle, perf_options, flags)`
170180

171181
**Return Value:**
172-
- Returns the raw 64-bit counter value on success
182+
- Returns the raw 64-bit counter value when no multiplexing occurred
183+
- Returns a scaled value when `time_running < time_enabled`
173184
- Returns `-1` on invalid/stale attachment or read failure
174185
- Reads use the attachment's `perf_fd` directly; the internal token detects copied handles used after detach.
186+
- Group snapshot reads are not supported yet; read grouped attachments individually.
175187

176188
---
177189

README.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ fn on_branch_miss(ctx: *bpf_perf_event_data) -> i32 {
306306
fn main() -> i32 {
307307
var prog = load(on_branch_miss)
308308
309-
// Minimal form — defaults: pid=-1 (all procs), cpu=0,
309+
// Minimal form — defaults: pid=-1 (all procs), cpu=0, group_fd=-1,
310310
// period=1_000_000, wakeup=1; perf attach flags must be 0
311311
var att = attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses }, 0)
312312
var count = read(att)
@@ -318,6 +318,19 @@ fn main() -> i32 {
318318
}
319319
```
320320

321+
Perf events can share a kernel scheduling group by passing the leader attachment's `perf_fd` as `group_fd`:
322+
323+
```kernelscript
324+
var cache = attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: cache_misses }, 0)
325+
var branch = attach(prog, perf_options {
326+
perf_type: perf_type_hardware,
327+
perf_config: branch_misses,
328+
group_fd: cache.perf_fd,
329+
}, 0)
330+
```
331+
332+
Adding a member restarts the whole group from zero. Detach members before detaching their leader. `read(att)` still reads one attachment at a time; it returns a multiplex-scaled count when the kernel reports `time_running < time_enabled`. Group snapshot reads are not part of this first-stage API.
333+
321334
**Available `perf_type` values:**
322335

323336
| Enum value | Hardware/software event |

SPEC.md

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ fn main() -> i32 {
461461
var prog = load(my_handler)
462462
463463
// Only perf_type + perf_config are required; all other fields use language-level defaults:
464-
// pid=-1, cpu=0, period=1_000_000, wakeup=1, inherit/exclude_*=false
464+
// pid=-1, cpu=0, group_fd=-1, period=1_000_000, wakeup=1, inherit/exclude_*=false
465465
var misses = attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses }, 0)
466466
467467
// Override specific fields as needed:
@@ -473,8 +473,17 @@ fn main() -> i32 {
473473
exclude_kernel: true,
474474
}, 0)
475475
476-
print("misses=%lld cache=%lld", read(misses), read(cache))
476+
// Put branch misses in cache's perf event group. Adding a member restarts
477+
// the whole group from zero.
478+
var branch = attach(prog, perf_options {
479+
perf_type: perf_type_hardware,
480+
perf_config: branch_misses,
481+
group_fd: cache.perf_fd,
482+
}, 0)
483+
484+
print("misses=%lld cache=%lld branch=%lld", read(misses), read(cache), read(branch))
477485
486+
detach(branch)
478487
detach(cache) // IOC_DISABLE → bpf_link__destroy → close(perf_fd)
479488
detach(misses)
480489
detach(prog)
@@ -490,6 +499,7 @@ fn main() -> i32 {
490499
| `perf_config` | `u64` | *(required)* | `perf_event_attr.config` value for that type |
491500
| `pid` | `i32` | `-1` | -1 = all processes; ≥0 = specific PID |
492501
| `cpu` | `i32` | `0` | ≥0 = specific CPU; -1 = any CPU (pid must be ≥0) |
502+
| `group_fd` | `i32` | `-1` | -1 = standalone event; ≥0 = perf group leader fd |
493503
| `period` | `u64` | `1000000` | Sample after this many events |
494504
| `wakeup` | `u32` | `1` | Wake userspace after N samples |
495505
| `inherit` | `bool` | `false` | Inherit to forked children |
@@ -538,16 +548,29 @@ For event families with a richer config space, such as `perf_type_hw_cache`, pro
538548
|---|---|---|
539549
| `ks_open_perf_event` | `int (ks_perf_options)` | Calls `perf_event_open(2)`, returns fd |
540550
| `ks_attach_perf_event` | `PerfAttachment (int prog_fd, ks_perf_options, int flags)` | Full open-reset-attach-enable lifecycle |
541-
| `ks_read_perf_count` | `int64_t (int perf_fd)` | Reads current 64-bit counter via `read()` |
551+
| `ks_read_perf_count` | `int64_t (int perf_fd)` | Reads current counter and applies multiplex scaling when needed |
542552
| `ks_perf_attachment_read` | `int64_t (PerfAttachment)` | Direct fd read through the attachment value with stale-handle detection |
543553

544-
**Attach sequence (compiler-generated, inside `ks_attach_perf_event`):**
554+
**Attach sequence for standalone events (compiler-generated, inside `ks_attach_perf_event`):**
545555
1. `ks_attr.attr.disabled = 1` — open counter without starting it
546-
2. `syscall(SYS_perf_event_open, ...)``perf_fd`
556+
2. `syscall(SYS_perf_event_open, ..., group_fd=-1, ...)``perf_fd`
547557
3. `ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0)` — zero the counter
548558
4. `bpf_program__attach_perf_event(prog, perf_fd)` — link BPF program
549559
5. `ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, 0)`**start counting**
550560

561+
**Perf event groups:**
562+
- `group_fd >= 0` opens the new event as a member of that leader fd.
563+
- Group members are opened disabled, linked to the BPF program, then the leader is disabled, reset, and enabled with `PERF_IOC_FLAG_GROUP`.
564+
- Adding a member to an already running group restarts the whole group from zero.
565+
- Detaching a member is allowed. Detaching a leader while live members reference it is rejected; detach members first.
566+
- Group snapshot reads are not implemented yet; read each `PerfAttachment` separately.
567+
568+
**Counter reads:**
569+
- Generated perf events request `PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING`.
570+
- `read(att)` returns the raw value when `time_enabled == time_running`.
571+
- If multiplexing occurred, `read(att)` returns `value * time_enabled / time_running` using a 128-bit intermediate.
572+
- If `time_running == 0`, `read(att)` reports an error and returns `-1`.
573+
551574
**Detach sequence (compiler-generated):**
552575
1. `ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, 0)` — stop counting
553576
2. `bpf_link__destroy(link)` — unlink BPF program
@@ -559,7 +582,7 @@ For event families with a richer config space, such as `perf_type_hw_cache`, pro
559582
- Returns a first-class `PerfAttachment` value for perf attaches so one program can hold multiple live counters
560583
- `PerfAttachment` carries `perf_fd` plus an internal generation token; `read(attachment)` avoids global attachment-list scans and rejects copied handles after detach
561584
- Exposes omitted `perf_options` fields as language-level defaults (partial struct literal)
562-
- Validates `pid ≥ -1`, `cpu ≥ -1`, and rejects `pid == -1 && cpu == -1` at runtime
585+
- Validates `pid ≥ -1`, `cpu ≥ -1`, `group_fd ≥ -1`, and rejects `pid == -1 && cpu == -1` at runtime
563586
- Emits `PERF_FLAG_FD_CLOEXEC` for safe fd inheritance
564587
- BPF program section is `SEC("perf_event")`
565588

examples/perf_cache_miss.ks

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,20 @@ fn on_cache_miss(ctx: *bpf_perf_event_data) -> i32 {
1111
fn main() -> i32 {
1212
var prog = load(on_cache_miss)
1313

14-
// Only perf_type + perf_config are required; pid, cpu, period, wakeup and flag fields
14+
// Only perf_type + perf_config are required; pid, cpu, group_fd, period, wakeup and flag fields
1515
// default to: pid=-1 (all procs), cpu=0, period=1_000_000, wakeup=1,
16-
// inherit/exclude_kernel/exclude_user=false.
16+
// group_fd=-1, inherit/exclude_kernel/exclude_user=false.
1717
var cache = attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: cache_misses, period: 10000000, inherit: true }, 0)
18-
var branch = attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses, period: 10000000, inherit: true }, 0)
18+
// branch joins cache's perf event group. Adding a member restarts the whole group from zero.
19+
var branch = attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses, period: 10000000, inherit: true, group_fd: cache.perf_fd }, 0)
1920
print("Cache-miss and branch-miss perf_event demo attached")
2021
var cache_count = read(cache)
2122
print("Cache-miss count: %lld", cache_count)
2223
var branch_count = read(branch)
2324
print("Branch-miss count: %lld", branch_count)
2425

25-
detach(cache)
2626
detach(branch)
27+
detach(cache)
2728
detach(prog)
2829
print("Cache-miss and branch-miss perf_event demo detached")
2930
return 0

examples/perf_page_fault.ks

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,26 @@ fn main() -> i32 {
1414
// pid: 0 = current process, cpu: -1 = any CPU (standard per-process monitoring).
1515
// page_faults (PERF_COUNT_SW_PAGE_FAULTS) is the most reliable software event:
1616
// every heap/stack allocation triggers minor page faults, no scheduler dependency.
17-
var att = attach(prog, perf_options { perf_type: perf_type_software, perf_config: page_faults, pid: 0, cpu: -1, period: 1 }, 0)
18-
print("Page-fault perf_event demo attached")
17+
var page = attach(prog, perf_options { perf_type: perf_type_software, perf_config: page_faults, pid: 0, cpu: -1, period: 1 }, 0)
18+
// branch joins cache's perf event group. Adding a member restarts the whole group from zero.
19+
var branch = attach(prog, perf_options { perf_type: perf_type_hardware, perf_config: branch_misses, period: 10000000, inherit: true}, 0)
20+
21+
print("perf_event demo attached")
1922

2023
// Repeatedly increment a counter; stack/heap activity will generate page faults.
2124
var x: i64 = 0
2225
for (i in 0..10000000) {
2326
x = x + 1
2427
}
2528

26-
var count = read(att)
27-
print("Page-fault count: %lld", count)
29+
var page_fault_count = read(page)
30+
print("Page-fault count: %lld", page_fault_count)
31+
var branch_count = read(branch)
32+
print("Branch-miss count: %lld", branch_count)
2833

29-
detach(att)
30-
print("Page-fault perf_event demo detached")
34+
detach(page)
35+
detach(branch)
36+
print("perf_event demo detached")
3137
detach(prog)
3238
return 0
3339
}

src/stdlib.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ let builtin_types = [
349349
("perf_config", U64);
350350
("pid", I32);
351351
("cpu", I32);
352+
("group_fd", I32);
352353
("period", U64);
353354
("wakeup", U32);
354355
("inherit", Bool);
@@ -374,6 +375,7 @@ let get_struct_field_defaults = function
374375
Some [
375376
("pid", IntLit (Signed64 (-1L), None));
376377
("cpu", IntLit (Signed64 0L, None));
378+
("group_fd", IntLit (Signed64 (-1L), None));
377379
("period", IntLit (Unsigned64 1000000L, None));
378380
("wakeup", IntLit (Unsigned64 1L, None));
379381
("inherit", BoolLit false);

0 commit comments

Comments
 (0)