Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion kernel/src/bpf/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,10 @@ pub fn bpf_map_create(attr: &bpf_attr) -> Result<usize> {
let bpf_map = BpfMap::new(map, map_meta);
let fd_table = ProcessManager::current_pcb().fd_table();
let file = File::new(Arc::new(bpf_map), FileFlags::O_RDWR | FileFlags::O_CLOEXEC)?;
let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
let fd = fd_table
.write()
.alloc_fd(file, None, true)
.map(|x| x as usize)?;
info!("create map with fd: [{}]", fd);
Ok(fd)
}
Expand Down
5 changes: 4 additions & 1 deletion kernel/src/bpf/prog/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ pub fn bpf_prog_load(attr: &bpf_attr) -> Result<usize> {
let fd_table = ProcessManager::current_pcb().fd_table();
let prog = BpfProgVerifier::new(prog, log_info.log_level, &mut []).verify(&fd_table)?;
let file = File::new(Arc::new(prog), FileFlags::O_RDWR)?;
let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?;
let fd = fd_table
.write()
.alloc_fd(file, None, false)
.map(|x| x as usize)?;
Ok(fd)
}
3 changes: 2 additions & 1 deletion kernel/src/filesystem/epoll/event_poll.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,13 @@ impl EventPoll {
/// - 成功则返回Ok(fd),否则返回Err
pub fn create_epoll(flags: FileFlags) -> Result<usize, SystemError> {
let ep_file = Self::create_epoll_file(flags)?;
let cloexec = flags.contains(FileFlags::O_CLOEXEC);

let current_pcb = ProcessManager::current_pcb();
let fd_table = current_pcb.fd_table();
let mut fd_table_guard = fd_table.write();

let fd = fd_table_guard.alloc_fd(ep_file, None)?;
let fd = fd_table_guard.alloc_fd(ep_file, None, cloexec)?;

Ok(fd as usize)
}
Expand Down
113 changes: 79 additions & 34 deletions kernel/src/filesystem/vfs/file.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use core::{
fmt,
sync::atomic::{AtomicBool, AtomicUsize, Ordering},
sync::atomic::{AtomicUsize, Ordering},
};

use alloc::{string::String, sync::Arc, vec::Vec};
Expand Down Expand Up @@ -395,8 +395,6 @@ pub struct File {
pub private_data: Mutex<FilePrivateData>,
/// 文件的凭证
cred: Arc<Cred>,
/// 文件描述符标志:是否在execve时关闭
close_on_exec: AtomicBool,
/// owner
pid: Mutex<Option<Arc<ProcessControlBlock>>>,
/// 预读状态
Expand Down Expand Up @@ -564,7 +562,8 @@ impl File {
flags.insert(FileFlags::O_APPEND);
}

let close_on_exec = flags.contains(FileFlags::O_CLOEXEC);
// O_CLOEXEC 是 per-fd 属性,由 alloc_fd/alloc_fd_arc 的 cloexec 参数控制,
// 不存储在 File.flags 中。
flags.remove(FileFlags::O_CLOEXEC);

let mut mode = FileMode::open_fmode(flags);
Expand Down Expand Up @@ -612,7 +611,6 @@ impl File {
readdir_subdirs_name: Mutex::new(Vec::default()),
private_data,
cred: ProcessManager::current_pcb().cred(),
close_on_exec: AtomicBool::new(close_on_exec),
pid: Mutex::new(None),
ra_state: Mutex::new(FileReadaheadState::new()),
};
Expand Down Expand Up @@ -1140,7 +1138,6 @@ impl File {
readdir_subdirs_name: Mutex::new(self.readdir_subdirs_name.lock().clone()),
private_data: Mutex::new(self.private_data.lock().clone()),
cred: self.cred.clone(),
close_on_exec: AtomicBool::new(self.close_on_exec.load(Ordering::SeqCst)),
pid: Mutex::new(None),
ra_state: Mutex::new(self.ra_state.lock().clone()),
};
Expand Down Expand Up @@ -1211,18 +1208,6 @@ impl File {
return *self.mode.read();
}

/// 获取文件是否在execve时关闭
#[inline]
pub fn close_on_exec(&self) -> bool {
return self.close_on_exec.load(Ordering::SeqCst);
}

/// 设置文件是否在execve时关闭
#[inline]
pub fn set_close_on_exec(&self, close_on_exec: bool) {
self.close_on_exec.store(close_on_exec, Ordering::SeqCst);
}

pub fn set_flags(&self, mut new_flags: FileFlags) -> Result<(), SystemError> {
// todo: 是否需要调用inode的open方法,以更新private data(假如它与flags有关的话)?
// 也许需要加个更好的设计,让inode知晓文件的打开模式发生了变化,让它自己决定是否需要更新private data
Expand Down Expand Up @@ -1393,6 +1378,8 @@ impl Drop for File {
pub struct FileDescriptorVec {
/// 当前进程打开的文件描述符
fds: Vec<Option<Arc<File>>>,
/// per-fd 的 close_on_exec 标志(与 fds 并行,对应 Linux fdtable.close_on_exec 位图)
cloexec: Vec<bool>,
/// 下一个可能空闲的文件描述符号(用于优化分配,避免O(n²)扫描)
/// 类似于 Linux 的 fd_next_fd
next_fd: usize,
Expand All @@ -1413,9 +1400,13 @@ impl FileDescriptorVec {
let mut data = Vec::with_capacity(FileDescriptorVec::INITIAL_CAPACITY);
data.resize(FileDescriptorVec::INITIAL_CAPACITY, None);

let mut cloexec = Vec::with_capacity(FileDescriptorVec::INITIAL_CAPACITY);
cloexec.resize(FileDescriptorVec::INITIAL_CAPACITY, false);

// 初始化文件描述符数组结构体
return FileDescriptorVec {
fds: data,
cloexec,
next_fd: 0,
};
}
Expand All @@ -1431,6 +1422,7 @@ impl FileDescriptorVec {
for i in 0..self.fds.len() {
if let Some(file) = &self.fds[i] {
res.fds[i] = Some(file.clone());
res.cloexec[i] = self.cloexec[i];
}
}
// 复制 next_fd 以保持相同的分配状态
Expand Down Expand Up @@ -1460,19 +1452,28 @@ impl FileDescriptorVec {

let current_len = self.fds.len();
if new_capacity > current_len {
// 扩容:扩展向量并填充None
// 扩容:扩展向量并填充None/false
// 使用 try_reserve 先检查内存分配是否可能成功
if self.fds.try_reserve(new_capacity - current_len).is_err() {
return Err(SystemError::ENOMEM);
}
if self
.cloexec
.try_reserve(new_capacity - current_len)
.is_err()
{
return Err(SystemError::ENOMEM);
}
self.fds.resize(new_capacity, None);
self.cloexec.resize(new_capacity, false);
Comment on lines 1457 to 1468

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Reserve cloexec bitmap capacity before resizing

resize_to_capacity only checks allocation failure with self.fds.try_reserve(...), then unconditionally executes self.cloexec.resize(...). Under memory pressure this second allocation can fail outside the error path, causing a panic/abort instead of returning ENOMEM as intended by the surrounding logic.

Useful? React with 👍 / 👎.

} else if new_capacity < current_len {
// 缩容:允许,但不能丢弃仍在使用的高位fd。
// 若高位fd仍在使用,将缩容目标提升到 (最高已用fd + 1)。
let floor = self.highest_open_index().map(|idx| idx + 1).unwrap_or(0);
let target = core::cmp::max(new_capacity, floor);
if target < current_len {
self.fds.truncate(target);
self.cloexec.truncate(target);
// 确保 next_fd 不超过新的容量
if self.next_fd > target {
self.next_fd = target;
Expand Down Expand Up @@ -1509,12 +1510,36 @@ impl FileDescriptorVec {
///
/// - `file` 要存放的文件对象
/// - `fd` 如果为Some(i32),表示指定要申请这个文件描述符,如果这个文件描述符已经被使用,那么返回EBADF
/// - `cloexec` 是否设置 close_on_exec 标志(per-fd 属性)
///
/// ## 返回值
///
/// - `Ok(i32)` 申请成功,返回申请到的文件描述符
/// - `Err(SystemError)` 申请失败,返回错误码,并且,file对象将被drop掉
pub fn alloc_fd(&mut self, file: File, fd: Option<i32>) -> Result<i32, SystemError> {
pub fn alloc_fd(
&mut self,
file: File,
fd: Option<i32>,
cloexec: bool,
) -> Result<i32, SystemError> {
self.alloc_fd_arc(Arc::new(file), fd, cloexec)
}

/// 申请文件描述符,并把已有的 Arc<File> 存入其中。
///
/// 用于 dup/dup2/dup3 等需要多个 fd 共享同一个 open file description 的场景。
///
/// ## 参数
///
/// - `file` 要存放的文件对象(Arc 引用)
/// - `fd` 如果为Some(i32),表示指定要申请这个文件描述符
/// - `cloexec` 是否设置 close_on_exec 标志(per-fd 属性)
pub fn alloc_fd_arc(
&mut self,
file: Arc<File>,
fd: Option<i32>,
cloexec: bool,
) -> Result<i32, SystemError> {
// 获取RLIMIT_NOFILE限制
let nofile_limit = crate::process::ProcessManager::current_pcb()
.get_rlimit(crate::process::resource::RLimitID::Nofile)
Expand All @@ -1533,7 +1558,8 @@ impl FileDescriptorVec {

let x = &mut self.fds[new_fd as usize];
if x.is_none() {
*x = Some(Arc::new(file));
*x = Some(file);
self.cloexec[new_fd as usize] = cloexec;
// 更新 next_fd:如果分配的是 next_fd 位置,则推进到下一个
if new_fd as usize == self.next_fd {
self.next_fd = new_fd as usize + 1;
Expand All @@ -1550,7 +1576,8 @@ impl FileDescriptorVec {
// 从 next_fd 开始查找空位
for i in self.next_fd..max_search {
if self.fds[i].is_none() {
self.fds[i] = Some(Arc::new(file));
self.fds[i] = Some(file);
self.cloexec[i] = cloexec;
// 更新 next_fd 为下一个位置
self.next_fd = i + 1;
return Ok(i as i32);
Expand All @@ -1570,7 +1597,8 @@ impl FileDescriptorVec {

// 扩容后,第一个新位置就是空的
let new_fd = current_len;
self.fds[new_fd] = Some(Arc::new(file));
self.fds[new_fd] = Some(file);
self.cloexec[new_fd] = cloexec;
// 更新 next_fd
self.next_fd = new_fd + 1;
return Ok(new_fd as i32);
Expand Down Expand Up @@ -1640,6 +1668,8 @@ impl FileDescriptorVec {

// 把文件描述符数组对应位置设置为空
let file = self.fds[fd as usize].take().unwrap();
// 清除 per-fd close_on_exec 标志
self.cloexec[fd as usize] = false;

// 更新 next_fd:如果释放的fd比当前next_fd小,则更新next_fd
// 这确保下次分配时可以复用较小的fd号,符合POSIX语义
Expand All @@ -1656,19 +1686,34 @@ impl FileDescriptorVec {
return FileDescriptorIterator::new(self);
}

/// 获取指定 fd 的 close_on_exec 标志
#[inline]
pub fn get_cloexec(&self, fd: i32) -> bool {
if !self.validate_fd(fd) {
return false;
}
self.cloexec[fd as usize]
}

/// 设置指定 fd 的 close_on_exec 标志
#[inline]
pub fn set_cloexec(&mut self, fd: i32, val: bool) {
if self.validate_fd(fd) {
self.cloexec[fd as usize] = val;
}
}

/// 在 execve 时关闭所有设置了 close_on_exec 的文件描述符
pub fn close_on_exec(&mut self) {
for i in 0..self.fds.len() {
if let Some(file) = &self.fds[i] {
let to_drop = file.close_on_exec();
if to_drop {
if let Err(r) = self.drop_fd(i as i32) {
error!(
"Failed to close file: pid = {:?}, fd = {}, error = {:?}",
ProcessManager::current_pcb().raw_pid(),
i,
r
);
}
if self.fds[i].is_some() && self.cloexec[i] {
if let Err(r) = self.drop_fd(i as i32) {
error!(
"Failed to close file: pid = {:?}, fd = {}, error = {:?}",
ProcessManager::current_pcb().raw_pid(),
i,
r
);
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion kernel/src/filesystem/vfs/open.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,12 +362,13 @@ fn do_sys_openat2(dirfd: i32, path: &str, how: OpenHow) -> Result<usize, SystemE
inode.resize(0)?;
}
let file: File = File::new(inode, how.o_flags)?;
let cloexec = how.o_flags.contains(FileFlags::O_CLOEXEC);

// 把文件对象存入pcb
let r = ProcessManager::current_pcb()
.fd_table()
.write()
.alloc_fd(file, None)
.alloc_fd(file, None, cloexec)
.map(|fd| fd as usize);

return r;
Expand Down
42 changes: 19 additions & 23 deletions kernel/src/filesystem/vfs/syscall/dup2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,45 +19,41 @@ pub fn do_dup3(
flags: FileFlags,
fd_table_guard: &mut RwSemWriteGuard<'_, FileDescriptorVec>,
) -> Result<usize, SystemError> {
// 检查 RLIMIT_NOFILE:newfd 必须小于软限制
// 检查 RLIMIT_NOFILE:newfd 必须小于软限制(与 Linux ksys_dup3 一致,返回 EBADF)
let nofile = crate::process::ProcessManager::current_pcb()
.get_rlimit(crate::process::resource::RLimitID::Nofile)
.rlim_cur;
if newfd as u64 >= nofile {
return Err(SystemError::EMFILE);
}

// 确认oldfd, newid是否有效
if !(fd_table_guard.validate_fd(oldfd) && fd_table_guard.validate_fd(newfd)) {
if newfd < 0 || newfd as u64 >= nofile {
return Err(SystemError::EBADF);
}

if oldfd == newfd {
// 若oldfd与newfd相等
// dup2(fd, fd) 语义:验证 oldfd 有效后原样返回(不修改 cloexec)
// 注意:dup3(fd, fd) 的 EINVAL 由 sys_dup3.rs 调用方在调用 do_dup3 之前处理
fd_table_guard
.get_file_by_fd(oldfd)
.ok_or(SystemError::EBADF)?;
return Ok(newfd as usize);
}
let new_exists = fd_table_guard.get_file_by_fd(newfd).is_some();
if new_exists {
// close newfd
if fd_table_guard.drop_fd(newfd).is_err() {
// An I/O error occurred while attempting to close fildes2.
return Err(SystemError::EIO);
}
}

// 验证 oldfd 有效(必须在当前 fd 表范围内且已打开)
// 注意:不需要验证 newfd 的范围,alloc_fd_arc 会自动扩容 fd 表
// (与 Linux ksys_dup3 中先调用 expand_files(files, newfd) 扩容一致)
let old_file = fd_table_guard
.get_file_by_fd(oldfd)
.ok_or(SystemError::EBADF)?;
let new_file = old_file.try_clone().ok_or(SystemError::EBADF)?;

if flags.contains(FileFlags::O_CLOEXEC) {
new_file.set_close_on_exec(true);
} else {
new_file.set_close_on_exec(false);
// 如果 newfd 已被占用,先关闭它
if fd_table_guard.get_file_by_fd(newfd).is_some() && fd_table_guard.drop_fd(newfd).is_err() {
// An I/O error occurred while attempting to close fildes2.
return Err(SystemError::EIO);
}
// 申请文件描述符,并把文件对象存入其中

let cloexec = flags.contains(FileFlags::O_CLOEXEC);

// 共享同一个 open file description(Arc<File>),符合 POSIX dup 语义
let res = fd_table_guard
.alloc_fd(new_file, Some(newfd))
.alloc_fd_arc(old_file, Some(newfd), cloexec)
.map(|x| x as usize);
return res;
}
9 changes: 4 additions & 5 deletions kernel/src/filesystem/vfs/syscall/sys_dup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ impl Syscall for SysDupHandle {
.get_file_by_fd(oldfd)
.ok_or(SystemError::EBADF)?;

let new_file = old_file.try_clone().ok_or(SystemError::EBADF)?;
// dup默认非cloexec
new_file.set_close_on_exec(false);
// 申请文件描述符,并把文件对象存入其中
let res = fd_table_guard.alloc_fd(new_file, None).map(|x| x as usize);
// dup 共享同一个 open file description(Arc<File>),cloexec 默认 false
let res = fd_table_guard
.alloc_fd_arc(old_file, None, false)
.map(|x| x as usize);
return res;
}

Expand Down
5 changes: 4 additions & 1 deletion kernel/src/filesystem/vfs/syscall/sys_eventfd2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,12 @@ pub fn do_eventfd(init_val: u32, flags: u32) -> Result<usize, SystemError> {
} else {
FileFlags::O_RDWR
};
let cloexec = flags.contains(EventFdFlags::EFD_CLOEXEC);
let file = File::new(inode, filemode)?;
let binding = ProcessManager::current_pcb().fd_table();
let mut fd_table_guard = binding.write();
let fd = fd_table_guard.alloc_fd(file, None).map(|x| x as usize);
let fd = fd_table_guard
.alloc_fd(file, None, cloexec)
.map(|x| x as usize);
return fd;
}
Loading
Loading