Skip to content

Commit 28cd649

Browse files
committed
pidfs: ensure that PIDFS_INFO_EXIT is available
JIRA: https://issues.redhat.com/browse/RHEL-113598 commit 68db272 Author: Christian Brauner <brauner@kernel.org> Date: Sun, 16 Mar 2025 13:49:09 +0100 pidfs: ensure that PIDFS_INFO_EXIT is available When we currently create a pidfd we check that the task hasn't been reaped right before we create the pidfd. But it is of course possible that by the time we return the pidfd to userspace the task has already been reaped since we don't check again after having created a dentry for it. This was fine until now because that race was meaningless. But now that we provide PIDFD_INFO_EXIT it is a problem because it is possible that the kernel returns a reaped pidfd and it depends on the race whether PIDFD_INFO_EXIT information is available. This depends on if the task gets reaped before or after a dentry has been attached to struct pid. Make this consistent and only returned pidfds for reaped tasks if PIDFD_INFO_EXIT information is available. This is done by performing another check whether the task has been reaped right after we attached a dentry to struct pid. Since pidfs_exit() is called before struct pid's task linkage is removed the case where the task got reaped but a dentry was already attached to struct pid and exit information was recorded and published can be handled correctly. In that case we do return a pidfd for a reaped task like we would've before. Link: https://lore.kernel.org/r/20250316-kabel-fehden-66bdb6a83436@brauner Reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Christian Brauner <brauner@kernel.org> Signed-off-by: Waiman Long <longman@redhat.com>
1 parent 25a0a76 commit 28cd649

File tree

3 files changed

+62
-5
lines changed

3 files changed

+62
-5
lines changed

fs/pidfs.c

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -763,8 +763,49 @@ static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
763763
return 0;
764764
}
765765

766+
static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
767+
unsigned int flags)
768+
{
769+
enum pid_type type;
770+
771+
if (flags & PIDFD_CLONE)
772+
return true;
773+
774+
/*
775+
* Make sure that if a pidfd is created PIDFD_INFO_EXIT
776+
* information will be available. So after an inode for the
777+
* pidfd has been allocated perform another check that the pid
778+
* is still alive. If it is exit information is available even
779+
* if the task gets reaped before the pidfd is returned to
780+
* userspace. The only exception is PIDFD_CLONE where no task
781+
* linkage has been established for @pid yet and the kernel is
782+
* in the middle of process creation so there's nothing for
783+
* pidfs to miss.
784+
*/
785+
if (flags & PIDFD_THREAD)
786+
type = PIDTYPE_PID;
787+
else
788+
type = PIDTYPE_TGID;
789+
790+
/*
791+
* Since pidfs_exit() is called before struct pid's task linkage
792+
* is removed the case where the task got reaped but a dentry
793+
* was already attached to struct pid and exit information was
794+
* recorded and published can be handled correctly.
795+
*/
796+
if (unlikely(!pid_has_task(pid, type))) {
797+
struct inode *inode = d_inode(path->dentry);
798+
return !!READ_ONCE(pidfs_i(inode)->exit_info);
799+
}
800+
801+
return true;
802+
}
803+
766804
static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
767805
{
806+
if (!pidfs_pid_valid(d_inode(path->dentry)->i_private, path, oflags))
807+
return ERR_PTR(-ESRCH);
808+
768809
/*
769810
* Clear O_LARGEFILE as open_by_handle_at() forces it and raise
770811
* O_RDWR as pidfds always are.
@@ -828,21 +869,30 @@ static struct file_system_type pidfs_type = {
828869

829870
struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
830871
{
831-
832872
struct file *pidfd_file;
833-
struct path path;
873+
struct path path __free(path_put) = {};
834874
int ret;
835875

876+
/*
877+
* Ensure that PIDFD_CLONE can be passed as a flag without
878+
* overloading other uapi pidfd flags.
879+
*/
880+
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD);
881+
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK);
882+
836883
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
837884
if (ret < 0)
838885
return ERR_PTR(ret);
839886

887+
if (!pidfs_pid_valid(pid, &path, flags))
888+
return ERR_PTR(-ESRCH);
889+
890+
flags &= ~PIDFD_CLONE;
840891
pidfd_file = dentry_open(&path, flags, current_cred());
841892
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
842893
if (!IS_ERR(pidfd_file))
843894
pidfd_file->f_flags |= (flags & PIDFD_THREAD);
844895

845-
path_put(&path);
846896
return pidfd_file;
847897
}
848898

include/uapi/linux/pidfd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
/* Flags for pidfd_open(). */
1111
#define PIDFD_NONBLOCK O_NONBLOCK
1212
#define PIDFD_THREAD O_EXCL
13+
#ifdef __KERNEL__
14+
#include <linux/sched.h>
15+
#define PIDFD_CLONE CLONE_PIDFD
16+
#endif
1317

1418
/* Flags for pidfd_send_signal(). */
1519
#define PIDFD_SIGNAL_THREAD (1UL << 0)

kernel/fork.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2401,8 +2401,11 @@ __latent_entropy struct task_struct *copy_process(
24012401
if (clone_flags & CLONE_PIDFD) {
24022402
int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;
24032403

2404-
/* Note that no task has been attached to @pid yet. */
2405-
retval = __pidfd_prepare(pid, flags, &pidfile);
2404+
/*
2405+
* Note that no task has been attached to @pid yet indicate
2406+
* that via CLONE_PIDFD.
2407+
*/
2408+
retval = __pidfd_prepare(pid, flags | PIDFD_CLONE, &pidfile);
24062409
if (retval < 0)
24072410
goto bad_fork_free_pid;
24082411
pidfd = retval;

0 commit comments

Comments
 (0)