Skip to content

Commit 7b34ded

Browse files
committed
support page fault
Signed-off-by: yaofighting <siyao@zju.edu.cn>
1 parent 5afa674 commit 7b34ded

15 files changed

Lines changed: 317 additions & 18 deletions

File tree

driver/bpf/fillers.h

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3898,6 +3898,13 @@ FILLER(sys_pagefault_e, false)
38983898
unsigned long error_code;
38993899
unsigned long address;
39003900
unsigned long ip;
3901+
struct task_struct *task;
3902+
unsigned long total_vm;
3903+
unsigned long maj_flt;
3904+
unsigned long min_flt;
3905+
struct mm_struct *mm;
3906+
long total_rss;
3907+
long swap;
39013908
u32 flags;
39023909
int res;
39033910

@@ -3914,17 +3921,61 @@ FILLER(sys_pagefault_e, false)
39143921
error_code = ctx->error_code;
39153922
#endif
39163923

3917-
res = bpf_val_to_ring(data, address);
3924+
3925+
3926+
task = (struct task_struct *)bpf_get_current_task();
3927+
3928+
3929+
/*
3930+
* pgft_maj
3931+
*/
3932+
maj_flt = _READ(task->maj_flt);
3933+
res = bpf_val_to_ring_type(data, maj_flt, PT_UINT64);
39183934
if (res != PPM_SUCCESS)
39193935
return res;
39203936

3921-
res = bpf_val_to_ring(data, ip);
3937+
/*
3938+
* pgft_min
3939+
*/
3940+
min_flt = _READ(task->min_flt);
3941+
res = bpf_val_to_ring_type(data, min_flt, PT_UINT64);
39223942
if (res != PPM_SUCCESS)
39233943
return res;
39243944

3925-
flags = pf_flags_to_scap(error_code);
3926-
res = bpf_val_to_ring(data, flags);
3945+
total_vm = 0;
3946+
total_rss = 0;
3947+
swap = 0;
3948+
3949+
mm = _READ(task->mm);
3950+
if (mm) {
3951+
total_vm = _READ(mm->total_vm);
3952+
total_vm <<= (PAGE_SHIFT - 10);
3953+
total_rss = bpf_get_mm_rss(mm) << (PAGE_SHIFT - 10);
3954+
swap = bpf_get_mm_swap(mm) << (PAGE_SHIFT - 10);
3955+
}
3956+
3957+
/*
3958+
* vm_size
3959+
*/
3960+
res = bpf_val_to_ring_type(data, total_vm, PT_UINT32);
3961+
if (res != PPM_SUCCESS)
3962+
return res;
3963+
3964+
/*
3965+
* vm_rss
3966+
*/
3967+
res = bpf_val_to_ring_type(data, total_rss, PT_UINT32);
3968+
if (res != PPM_SUCCESS)
3969+
return res;
3970+
3971+
/*
3972+
* vm_swap
3973+
*/
3974+
res = bpf_val_to_ring_type(data, swap, PT_UINT32);
39273975

3976+
pid_t tid = _READ(task->pid);
3977+
int map_res = bpf_map_update_elem(&pgft_major_map, &tid, &maj_flt, BPF_ANY);
3978+
if(map_res != 0) return PPM_MAP_FAILURE;
39283979
return res;
39293980
}
39303981

driver/bpf/maps.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,6 @@ struct bpf_map_def __bpf_section("maps") local_state_map = {
9090
.max_entries = 0,
9191
};
9292

93-
#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
94-
struct bpf_map_def __bpf_section("maps") stash_map = {
95-
.type = BPF_MAP_TYPE_HASH,
96-
.key_size = sizeof(u64),
97-
.value_size = sizeof(struct sys_stash_args),
98-
.max_entries = 65535,
99-
};
100-
#endif
10193

10294
struct bpf_map_def __bpf_section("maps") rtt_static_map = {
10395
.type = BPF_MAP_TYPE_HASH,
@@ -113,6 +105,22 @@ struct bpf_map_def __bpf_section("maps") stash_tuple_map = {
113105
.max_entries = 65535,
114106
};
115107

108+
struct bpf_map_def __bpf_section("maps") pgft_major_map = {
109+
.type = BPF_MAP_TYPE_HASH,
110+
.key_size = sizeof(pid_t),
111+
.value_size = sizeof(u64),
112+
.max_entries = 1048576,
113+
};
114+
115+
#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
116+
struct bpf_map_def __bpf_section("maps") stash_map = {
117+
.type = BPF_MAP_TYPE_HASH,
118+
.key_size = sizeof(u64),
119+
.value_size = sizeof(struct sys_stash_args),
120+
.max_entries = 65535,
121+
};
122+
#endif
123+
116124
enum offcpu_type {
117125
ON, // 0
118126
DISK, // 1

driver/bpf/probe.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,29 @@ static __always_inline int bpf_page_fault(struct page_fault_args *ctx)
409409

410410
if (!settings->capture_enabled)
411411
return 0;
412+
413+
if(settings->pgft_map_clear)
414+
return 0;
415+
416+
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
417+
unsigned long maj_flt = _READ(task->maj_flt);
418+
if(maj_flt == 0){
419+
return 0;
420+
}
421+
pid_t tid = _READ(task->pid);
422+
unsigned long *last_maj = bpf_map_lookup_elem(&pgft_major_map, &tid);
423+
if(last_maj && *last_maj == maj_flt){
424+
return 0;
425+
}
426+
427+
if(!last_maj){
428+
int key = -1;
429+
unsigned long *page_faults_threads_number = bpf_map_lookup_elem(&pgft_major_map, &key);
430+
if(page_faults_threads_number){
431+
(*page_faults_threads_number)++;
432+
bpf_map_update_elem(&pgft_major_map, &key, page_faults_threads_number, BPF_ANY);
433+
}
434+
}
412435

413436
evt_type = PPME_PAGE_FAULT_E;
414437

driver/bpf/types.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,13 @@ enum sysdig_map_types {
261261
SYSDIG_TMP_SCRATCH_MAP = 7,
262262
SYSDIG_SETTINGS_MAP = 8,
263263
SYSDIG_LOCAL_STATE_MAP = 9,
264+
SYSDIG_RTT_STATISTICS = 10,
265+
SYSDIG_STASH_TUPLE_MAP = 11,
266+
SYSDIG_PAGEFAULT_MAJOR_MAP = 12,
264267
#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
265-
SYSDIG_STASH_MAP = 10,
266-
SYSDIG_RTT_STATISTICS = 11,
268+
SYSDIG_STASH_MAP = 13,
267269
#endif
270+
268271
};
269272

270273
struct sysdig_bpf_settings {
@@ -275,6 +278,7 @@ struct sysdig_bpf_settings {
275278
bool capture_enabled;
276279
bool do_dynamic_snaplen;
277280
bool page_faults;
281+
bool pgft_map_clear;
278282
bool dropping_mode;
279283
bool is_dropping;
280284
bool tracers_enabled;

driver/event_table.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ const struct ppm_event_info g_event_info[PPM_EVENT_MAX] = {
302302
/* PPME_INFRASTRUCTURE_EVENT_X */{"NA4", EC_SYSTEM, EF_UNUSED, 0},
303303
/* PPME_SYSCALL_EXECVE_18_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 1, {{"filename", PT_FSPATH, PF_NA} } },
304304
/* PPME_SYSCALL_EXECVE_18_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 17, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC} } },
305-
/* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 3, {{"addr", PT_UINT64, PF_HEX}, {"ip", PT_UINT64, PF_HEX}, {"error", PT_FLAGS32, PF_HEX, pf_flags} } },
305+
/* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 5, {{"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC} } },
306306
/* PPME_PAGE_FAULT_X */ {"NA5", EC_OTHER, EF_UNUSED, 0},
307307
/* PPME_SYSCALL_EXECVE_19_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 1, {{"filename", PT_FSPATH, PF_NA} } },
308308
/* PPME_SYSCALL_EXECVE_19_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 19, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC}, {"pgid", PT_PID, PF_DEC}, {"loginuid", PT_INT32, PF_DEC} } },

driver/ppm_events_public.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,6 +1663,7 @@ struct ppm_event_entry {
16631663
#define PPM_FAILURE_INVALID_USER_MEMORY -2
16641664
#define PPM_FAILURE_BUG -3
16651665
#define PPM_SKIP_EVENT -4
1666+
#define PPM_MAP_FAILURE -5
16661667

16671668
#define RW_SNAPLEN 80
16681669
#define RW_MAX_SNAPLEN PPM_MAX_ARG_SIZE

userspace/libscap/scap.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ limitations under the License.
3535
#endif // _WIN32
3636

3737
#include "scap.h"
38+
#include "../../driver/bpf/types.h"
3839
#ifdef HAS_CAPTURE
3940
#if !defined(_WIN32) && !defined(CYGWING_AGENT)
4041
#include "driver_config.h"
@@ -1918,6 +1919,32 @@ int32_t scap_enable_tracers_capture(scap_t* handle)
19181919
}
19191920
#endif
19201921

1922+
int scap_get_pagefaults_threads_number(scap_t *handle){
1923+
return scap_bpf_get_pagefault_threads_number(handle);
1924+
}
1925+
1926+
int32_t scap_update_pagefaults_thread_number(scap_t *handle, int tid, unsigned long val){
1927+
return scap_bpf_update_pagefaults_threads_number(handle, tid, val);
1928+
}
1929+
1930+
#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32)
1931+
int32_t scap_pagefaults_map_clear(scap_t *handle){
1932+
if(handle->m_mode != SCAP_MODE_LIVE)
1933+
{
1934+
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "scap_pagefaults_map_clear not supported on this scap mode");
1935+
ASSERT(false);
1936+
return SCAP_FAILURE;
1937+
}
1938+
if(handle->m_ndevs)
1939+
{
1940+
if(handle->m_bpf)
1941+
{
1942+
return scap_bpf_clear_pagefault_map(handle);
1943+
}
1944+
}
1945+
}
1946+
#endif
1947+
19211948
#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32)
19221949
int32_t scap_enable_page_faults(scap_t *handle)
19231950
{

userspace/libscap/scap.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,9 @@ uint64_t scap_ftell(scap_t *handle);
10681068
void scap_fseek(scap_t *handle, uint64_t off);
10691069
int32_t scap_enable_tracers_capture(scap_t* handle);
10701070
int32_t scap_enable_page_faults(scap_t *handle);
1071+
int scap_get_pagefaults_threads_number(scap_t *handle);
1072+
int32_t scap_update_pagefaults_thread_number(scap_t *handle, int tid, unsigned long val);
1073+
int32_t scap_pagefaults_map_clear(scap_t *handle);
10711074
int32_t scap_enable_skb_capture(scap_t *handle);
10721075
int32_t scap_disable_skb_capture(scap_t *handle);
10731076
uint64_t scap_get_unexpected_block_readsize(scap_t* handle);

userspace/libscap/scap_bpf.c

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,30 @@ static int bpf_map_update_elem(int fd, const void *key, const void *value, uint6
116116
return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
117117
}
118118

119+
static int bpf_map_get_next_key(int fd, void *key, void *next_key){
120+
union bpf_attr attr;
121+
122+
bzero(&attr, sizeof(attr));
123+
124+
attr.map_fd = fd;
125+
attr.key = (unsigned long) key;
126+
attr.next_key = (unsigned long) next_key;
127+
128+
return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
129+
}
130+
131+
static int bpf_map_delete_elem(int fd, const void *key)
132+
{
133+
union bpf_attr attr;
134+
135+
bzero(&attr, sizeof(attr));
136+
137+
attr.map_fd = fd;
138+
attr.key = (unsigned long) key;
139+
140+
return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
141+
}
142+
119143
static int bpf_map_lookup_elem(int fd, const void *key, void *value)
120144
{
121145
union bpf_attr attr;
@@ -1220,7 +1244,71 @@ int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle)
12201244

12211245
return SCAP_SUCCESS;
12221246
}
1247+
int32_t scap_bpf_clear_pagefault_map(scap_t* handle){
1248+
struct sysdig_bpf_settings settings;
1249+
int k = 0;
12231250

1251+
if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings) != 0)
1252+
{
1253+
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_lookup_elem < 0");
1254+
return SCAP_FAILURE;
1255+
}
1256+
1257+
// start to clear map & set the mutex
1258+
settings.pgft_map_clear = true;
1259+
if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings, BPF_ANY) != 0)
1260+
{
1261+
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_update_elem < 0");
1262+
return SCAP_FAILURE;
1263+
}
1264+
1265+
1266+
int next_key, lookup_key;
1267+
lookup_key = -1;
1268+
while(bpf_map_get_next_key(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &lookup_key, &next_key) == 0){
1269+
bpf_map_delete_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &next_key);
1270+
lookup_key = next_key;
1271+
}
1272+
1273+
// end up to clear map & clear the mutex
1274+
settings.pgft_map_clear = false;
1275+
if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings, BPF_ANY) != 0)
1276+
{
1277+
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_update_elem < 0");
1278+
return SCAP_FAILURE;
1279+
}
1280+
1281+
1282+
k = -1;
1283+
unsigned long val = 0;
1284+
1285+
if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &k, &val, BPF_ANY) != 0)
1286+
{
1287+
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_update_elem < 0");
1288+
return SCAP_FAILURE;
1289+
}
1290+
1291+
return SCAP_SUCCESS;
1292+
}
1293+
int scap_bpf_get_pagefault_threads_number(scap_t* handle){
1294+
int k = -1;
1295+
unsigned long val = 0;
1296+
1297+
if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &k, &val) != 0)
1298+
{
1299+
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_lookup_elem < 0");
1300+
return -1;
1301+
}
1302+
return val;
1303+
}
1304+
int32_t scap_bpf_update_pagefaults_threads_number(scap_t* handle, int tid, unsigned long val){
1305+
if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &tid, &val, BPF_ANY) != 0)
1306+
{
1307+
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_update_elem < 0");
1308+
return SCAP_FAILURE;
1309+
}
1310+
return SCAP_SUCCESS;
1311+
}
12241312
int32_t scap_bpf_enable_page_faults(scap_t* handle)
12251313
{
12261314
struct sysdig_bpf_settings settings;
@@ -1443,6 +1531,7 @@ static int32_t set_default_settings(scap_t *handle)
14431531
settings.is_dropping = false;
14441532
settings.tracers_enabled = false;
14451533
settings.skb_capture = false;
1534+
settings.pgft_map_clear = false;
14461535
settings.fullcapture_port_range_start = 0;
14471536
settings.fullcapture_port_range_end = 0;
14481537
settings.statsd_port = 8125;

userspace/libscap/scap_bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ int32_t scap_bpf_set_fullcapture_port_range(scap_t* handle, uint16_t range_start
4040
int32_t scap_bpf_set_statsd_port(scap_t* handle, uint16_t port);
4141
int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle);
4242
int32_t scap_bpf_disable_dynamic_snaplen(scap_t* handle);
43+
int32_t scap_bpf_clear_pagefault_map(scap_t* handle);
4344
int32_t scap_bpf_enable_page_faults(scap_t* handle);
45+
int scap_bpf_get_pagefault_threads_number(scap_t* handle);
46+
int32_t scap_bpf_update_pagefaults_threads_number(scap_t* handle, int tid, unsigned long val);
4447
int32_t scap_bpf_start_dropping_mode(scap_t* handle, uint32_t sampling_ratio);
4548
int32_t scap_bpf_stop_dropping_mode(scap_t* handle);
4649
int32_t scap_bpf_enable_tracers_capture(scap_t* handle);

0 commit comments

Comments
 (0)