Skip to content

Commit 4eb7562

Browse files
author
Miklos Szeredi
committed
fanotify: add watchdog for permission events
JIRA: https://issues.redhat.com/browse/RHEL-44601 ``` commit b8cf8fd Author: Miklos Szeredi <mszeredi@redhat.com> Date: Tue Sep 9 16:30:47 2025 +0200 fanotify: add watchdog for permission events This is to make it easier to debug issues with AV software, which time and again deadlocks with no indication of where the issue comes from, and the kernel being blamed for the deadlock. Then we need to analyze dumps to prove that the kernel is not in fact at fault. The deadlock comes from recursion: handling the event triggers another permission event, in some roundabout way, obviously, otherwise it would have been found in testing. With this patch a warning is printed when permission event is received by userspace but not answered for more than the timeout specified in /proc/sys/fs/fanotify/watchdog_timeout. The watchdog can be turned off by setting the timeout to zero (which is the default). The timeout is very coarse (T <= t < 2T) but I guess it's good enough for the purpose. Overhead should be minimal. Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> Link: https://patch.msgid.link/20250909143053.112171-1-mszeredi@redhat.com Signed-off-by: Jan Kara <jack@suse.cz> ``` Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
1 parent cb4d865 commit 4eb7562

File tree

3 files changed

+106
-0
lines changed

3 files changed

+106
-0
lines changed

fs/notify/fanotify/fanotify.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,9 @@ struct fanotify_perm_event {
427427
struct path path;
428428
u32 response; /* userspace answer to the event */
429429
unsigned short state; /* state of the event */
430+
unsigned short watchdog_cnt; /* already scanned by watchdog? */
430431
int fd; /* fd we passed to userspace for this event */
432+
pid_t recv_pid; /* pid of task receiving the event */
431433
union {
432434
struct fanotify_response_info_header hdr;
433435
struct fanotify_response_info_audit_rule audit_rule;

fs/notify/fanotify/fanotify_user.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151

5252
/* configurable via /proc/sys/fs/fanotify/ */
5353
static int fanotify_max_queued_events __read_mostly;
54+
static int perm_group_timeout __read_mostly;
5455

5556
#ifdef CONFIG_SYSCTL
5657

@@ -86,6 +87,14 @@ static struct ctl_table fanotify_table[] = {
8687
.proc_handler = proc_dointvec_minmax,
8788
.extra1 = SYSCTL_ZERO
8889
},
90+
{
91+
.procname = "watchdog_timeout",
92+
.data = &perm_group_timeout,
93+
.maxlen = sizeof(int),
94+
.mode = 0644,
95+
.proc_handler = proc_dointvec_minmax,
96+
.extra1 = SYSCTL_ZERO,
97+
},
8998
};
9099

91100
static void __init fanotify_sysctls_init(void)
@@ -96,6 +105,91 @@ static void __init fanotify_sysctls_init(void)
96105
#define fanotify_sysctls_init() do { } while (0)
97106
#endif /* CONFIG_SYSCTL */
98107

108+
static LIST_HEAD(perm_group_list);
109+
static DEFINE_SPINLOCK(perm_group_lock);
110+
static void perm_group_watchdog(struct work_struct *work);
111+
static DECLARE_DELAYED_WORK(perm_group_work, perm_group_watchdog);
112+
113+
static void perm_group_watchdog_schedule(void)
114+
{
115+
schedule_delayed_work(&perm_group_work, secs_to_jiffies(perm_group_timeout));
116+
}
117+
118+
static void perm_group_watchdog(struct work_struct *work)
119+
{
120+
struct fsnotify_group *group;
121+
struct fanotify_perm_event *event;
122+
struct task_struct *task;
123+
pid_t failed_pid = 0;
124+
125+
guard(spinlock)(&perm_group_lock);
126+
if (list_empty(&perm_group_list))
127+
return;
128+
129+
list_for_each_entry(group, &perm_group_list,
130+
fanotify_data.perm_grp_list) {
131+
/*
132+
* Ok to test without lock, racing with an addition is
133+
* fine, will deal with it next round
134+
*/
135+
if (list_empty(&group->fanotify_data.access_list))
136+
continue;
137+
138+
spin_lock(&group->notification_lock);
139+
list_for_each_entry(event, &group->fanotify_data.access_list,
140+
fae.fse.list) {
141+
if (likely(event->watchdog_cnt == 0)) {
142+
event->watchdog_cnt = 1;
143+
} else if (event->watchdog_cnt == 1) {
144+
/* Report on event only once */
145+
event->watchdog_cnt = 2;
146+
147+
/* Do not report same pid repeatedly */
148+
if (event->recv_pid == failed_pid)
149+
continue;
150+
151+
failed_pid = event->recv_pid;
152+
rcu_read_lock();
153+
task = find_task_by_pid_ns(event->recv_pid,
154+
&init_pid_ns);
155+
pr_warn_ratelimited(
156+
"PID %u (%s) failed to respond to fanotify queue for more than %d seconds\n",
157+
event->recv_pid,
158+
task ? task->comm : NULL,
159+
perm_group_timeout);
160+
rcu_read_unlock();
161+
}
162+
}
163+
spin_unlock(&group->notification_lock);
164+
}
165+
perm_group_watchdog_schedule();
166+
}
167+
168+
static void fanotify_perm_watchdog_group_remove(struct fsnotify_group *group)
169+
{
170+
if (!list_empty(&group->fanotify_data.perm_grp_list)) {
171+
/* Perm event watchdog can no longer scan this group. */
172+
spin_lock(&perm_group_lock);
173+
list_del_init(&group->fanotify_data.perm_grp_list);
174+
spin_unlock(&perm_group_lock);
175+
}
176+
}
177+
178+
static void fanotify_perm_watchdog_group_add(struct fsnotify_group *group)
179+
{
180+
if (!perm_group_timeout)
181+
return;
182+
183+
spin_lock(&perm_group_lock);
184+
if (list_empty(&group->fanotify_data.perm_grp_list)) {
185+
/* Add to perm_group_list for monitoring by watchdog. */
186+
if (list_empty(&perm_group_list))
187+
perm_group_watchdog_schedule();
188+
list_add_tail(&group->fanotify_data.perm_grp_list, &perm_group_list);
189+
}
190+
spin_unlock(&perm_group_lock);
191+
}
192+
99193
/*
100194
* All flags that may be specified in parameter event_f_flags of fanotify_init.
101195
*
@@ -854,6 +948,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
854948
spin_lock(&group->notification_lock);
855949
list_add_tail(&event->fse.list,
856950
&group->fanotify_data.access_list);
951+
FANOTIFY_PERM(event)->recv_pid = current->pid;
857952
spin_unlock(&group->notification_lock);
858953
}
859954
}
@@ -913,6 +1008,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
9131008
*/
9141009
fsnotify_group_stop_queueing(group);
9151010

1011+
fanotify_perm_watchdog_group_remove(group);
1012+
9161013
/*
9171014
* Process all permission events on access_list and notification queue
9181015
* and simulate reply from userspace.
@@ -1365,6 +1462,10 @@ static int fanotify_add_mark(struct fsnotify_group *group,
13651462
fsnotify_group_unlock(group);
13661463

13671464
fsnotify_put_mark(fsn_mark);
1465+
1466+
if (!ret && (mask & FANOTIFY_PERM_EVENTS))
1467+
fanotify_perm_watchdog_group_add(group);
1468+
13681469
return ret;
13691470
}
13701471

@@ -1514,6 +1615,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
15141615
group->fanotify_data.f_flags = event_f_flags;
15151616
init_waitqueue_head(&group->fanotify_data.access_waitq);
15161617
INIT_LIST_HEAD(&group->fanotify_data.access_list);
1618+
INIT_LIST_HEAD(&group->fanotify_data.perm_grp_list);
15171619
switch (class) {
15181620
case FAN_CLASS_NOTIF:
15191621
group->priority = FSNOTIFY_PRIO_NORMAL;

include/linux/fsnotify_backend.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ struct fsnotify_group {
255255
int f_flags; /* event_f_flags from fanotify_init() */
256256
struct ucounts *ucounts;
257257
mempool_t error_events_pool;
258+
/* chained on perm_group_list */
259+
struct list_head perm_grp_list;
258260
} fanotify_data;
259261
#endif /* CONFIG_FANOTIFY */
260262
};

0 commit comments

Comments
 (0)