diff --git a/kernel/src/bpf/map/mod.rs b/kernel/src/bpf/map/mod.rs index d6e11f530e..a7720c7ebb 100644 --- a/kernel/src/bpf/map/mod.rs +++ b/kernel/src/bpf/map/mod.rs @@ -249,7 +249,10 @@ pub fn bpf_map_create(attr: &bpf_attr) -> Result { let bpf_map = BpfMap::new(map, map_meta); let fd_table = ProcessManager::current_pcb().fd_table(); let file = File::new(Arc::new(bpf_map), FileFlags::O_RDWR | FileFlags::O_CLOEXEC)?; - let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?; + let fd = fd_table + .write() + .alloc_fd(file, None, true) + .map(|x| x as usize)?; info!("create map with fd: [{}]", fd); Ok(fd) } diff --git a/kernel/src/bpf/prog/mod.rs b/kernel/src/bpf/prog/mod.rs index acc61c2578..3866c9dfa0 100644 --- a/kernel/src/bpf/prog/mod.rs +++ b/kernel/src/bpf/prog/mod.rs @@ -122,6 +122,9 @@ pub fn bpf_prog_load(attr: &bpf_attr) -> Result { let fd_table = ProcessManager::current_pcb().fd_table(); let prog = BpfProgVerifier::new(prog, log_info.log_level, &mut []).verify(&fd_table)?; let file = File::new(Arc::new(prog), FileFlags::O_RDWR)?; - let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?; + let fd = fd_table + .write() + .alloc_fd(file, None, false) + .map(|x| x as usize)?; Ok(fd) } diff --git a/kernel/src/filesystem/epoll/event_poll.rs b/kernel/src/filesystem/epoll/event_poll.rs index 19170581ca..4b9684e7e6 100644 --- a/kernel/src/filesystem/epoll/event_poll.rs +++ b/kernel/src/filesystem/epoll/event_poll.rs @@ -92,12 +92,13 @@ impl EventPoll { /// - 成功则返回Ok(fd),否则返回Err pub fn create_epoll(flags: FileFlags) -> Result { let ep_file = Self::create_epoll_file(flags)?; + let cloexec = flags.contains(FileFlags::O_CLOEXEC); let current_pcb = ProcessManager::current_pcb(); let fd_table = current_pcb.fd_table(); let mut fd_table_guard = fd_table.write(); - let fd = fd_table_guard.alloc_fd(ep_file, None)?; + let fd = fd_table_guard.alloc_fd(ep_file, None, cloexec)?; Ok(fd as usize) } diff --git a/kernel/src/filesystem/vfs/file.rs b/kernel/src/filesystem/vfs/file.rs index 70060f6c94..1dd009770c 100644 --- a/kernel/src/filesystem/vfs/file.rs +++ b/kernel/src/filesystem/vfs/file.rs @@ -1,6 +1,6 @@ use core::{ fmt, - sync::atomic::{AtomicBool, AtomicUsize, Ordering}, + sync::atomic::{AtomicUsize, Ordering}, }; use alloc::{string::String, sync::Arc, vec::Vec}; @@ -395,8 +395,6 @@ pub struct File { pub private_data: Mutex, /// 文件的凭证 cred: Arc, - /// 文件描述符标志:是否在execve时关闭 - close_on_exec: AtomicBool, /// owner pid: Mutex>>, /// 预读状态 @@ -564,7 +562,8 @@ impl File { flags.insert(FileFlags::O_APPEND); } - let close_on_exec = flags.contains(FileFlags::O_CLOEXEC); + // O_CLOEXEC 是 per-fd 属性,由 alloc_fd/alloc_fd_arc 的 cloexec 参数控制, + // 不存储在 File.flags 中。 flags.remove(FileFlags::O_CLOEXEC); let mut mode = FileMode::open_fmode(flags); @@ -612,7 +611,6 @@ impl File { readdir_subdirs_name: Mutex::new(Vec::default()), private_data, cred: ProcessManager::current_pcb().cred(), - close_on_exec: AtomicBool::new(close_on_exec), pid: Mutex::new(None), ra_state: Mutex::new(FileReadaheadState::new()), }; @@ -1140,7 +1138,6 @@ impl File { readdir_subdirs_name: Mutex::new(self.readdir_subdirs_name.lock().clone()), private_data: Mutex::new(self.private_data.lock().clone()), cred: self.cred.clone(), - close_on_exec: AtomicBool::new(self.close_on_exec.load(Ordering::SeqCst)), pid: Mutex::new(None), ra_state: Mutex::new(self.ra_state.lock().clone()), }; @@ -1211,18 +1208,6 @@ impl File { return *self.mode.read(); } - /// 获取文件是否在execve时关闭 - #[inline] - pub fn close_on_exec(&self) -> bool { - return self.close_on_exec.load(Ordering::SeqCst); - } - - /// 设置文件是否在execve时关闭 - #[inline] - pub fn set_close_on_exec(&self, close_on_exec: bool) { - self.close_on_exec.store(close_on_exec, Ordering::SeqCst); - } - pub fn set_flags(&self, mut new_flags: FileFlags) -> Result<(), SystemError> { // todo: 是否需要调用inode的open方法,以更新private data(假如它与flags有关的话)? // 也许需要加个更好的设计,让inode知晓文件的打开模式发生了变化,让它自己决定是否需要更新private data @@ -1393,6 +1378,8 @@ impl Drop for File { pub struct FileDescriptorVec { /// 当前进程打开的文件描述符 fds: Vec>>, + /// per-fd 的 close_on_exec 标志(与 fds 并行,对应 Linux fdtable.close_on_exec 位图) + cloexec: Vec, /// 下一个可能空闲的文件描述符号(用于优化分配,避免O(n²)扫描) /// 类似于 Linux 的 fd_next_fd next_fd: usize, @@ -1413,9 +1400,13 @@ impl FileDescriptorVec { let mut data = Vec::with_capacity(FileDescriptorVec::INITIAL_CAPACITY); data.resize(FileDescriptorVec::INITIAL_CAPACITY, None); + let mut cloexec = Vec::with_capacity(FileDescriptorVec::INITIAL_CAPACITY); + cloexec.resize(FileDescriptorVec::INITIAL_CAPACITY, false); + // 初始化文件描述符数组结构体 return FileDescriptorVec { fds: data, + cloexec, next_fd: 0, }; } @@ -1431,6 +1422,7 @@ impl FileDescriptorVec { for i in 0..self.fds.len() { if let Some(file) = &self.fds[i] { res.fds[i] = Some(file.clone()); + res.cloexec[i] = self.cloexec[i]; } } // 复制 next_fd 以保持相同的分配状态 @@ -1460,12 +1452,20 @@ impl FileDescriptorVec { let current_len = self.fds.len(); if new_capacity > current_len { - // 扩容:扩展向量并填充None + // 扩容:扩展向量并填充None/false // 使用 try_reserve 先检查内存分配是否可能成功 if self.fds.try_reserve(new_capacity - current_len).is_err() { return Err(SystemError::ENOMEM); } + if self + .cloexec + .try_reserve(new_capacity - current_len) + .is_err() + { + return Err(SystemError::ENOMEM); + } self.fds.resize(new_capacity, None); + self.cloexec.resize(new_capacity, false); } else if new_capacity < current_len { // 缩容:允许,但不能丢弃仍在使用的高位fd。 // 若高位fd仍在使用,将缩容目标提升到 (最高已用fd + 1)。 @@ -1473,6 +1473,7 @@ impl FileDescriptorVec { let target = core::cmp::max(new_capacity, floor); if target < current_len { self.fds.truncate(target); + self.cloexec.truncate(target); // 确保 next_fd 不超过新的容量 if self.next_fd > target { self.next_fd = target; @@ -1509,12 +1510,36 @@ impl FileDescriptorVec { /// /// - `file` 要存放的文件对象 /// - `fd` 如果为Some(i32),表示指定要申请这个文件描述符,如果这个文件描述符已经被使用,那么返回EBADF + /// - `cloexec` 是否设置 close_on_exec 标志(per-fd 属性) /// /// ## 返回值 /// /// - `Ok(i32)` 申请成功,返回申请到的文件描述符 /// - `Err(SystemError)` 申请失败,返回错误码,并且,file对象将被drop掉 - pub fn alloc_fd(&mut self, file: File, fd: Option) -> Result { + pub fn alloc_fd( + &mut self, + file: File, + fd: Option, + cloexec: bool, + ) -> Result { + self.alloc_fd_arc(Arc::new(file), fd, cloexec) + } + + /// 申请文件描述符,并把已有的 Arc 存入其中。 + /// + /// 用于 dup/dup2/dup3 等需要多个 fd 共享同一个 open file description 的场景。 + /// + /// ## 参数 + /// + /// - `file` 要存放的文件对象(Arc 引用) + /// - `fd` 如果为Some(i32),表示指定要申请这个文件描述符 + /// - `cloexec` 是否设置 close_on_exec 标志(per-fd 属性) + pub fn alloc_fd_arc( + &mut self, + file: Arc, + fd: Option, + cloexec: bool, + ) -> Result { // 获取RLIMIT_NOFILE限制 let nofile_limit = crate::process::ProcessManager::current_pcb() .get_rlimit(crate::process::resource::RLimitID::Nofile) @@ -1533,7 +1558,8 @@ impl FileDescriptorVec { let x = &mut self.fds[new_fd as usize]; if x.is_none() { - *x = Some(Arc::new(file)); + *x = Some(file); + self.cloexec[new_fd as usize] = cloexec; // 更新 next_fd:如果分配的是 next_fd 位置,则推进到下一个 if new_fd as usize == self.next_fd { self.next_fd = new_fd as usize + 1; @@ -1550,7 +1576,8 @@ impl FileDescriptorVec { // 从 next_fd 开始查找空位 for i in self.next_fd..max_search { if self.fds[i].is_none() { - self.fds[i] = Some(Arc::new(file)); + self.fds[i] = Some(file); + self.cloexec[i] = cloexec; // 更新 next_fd 为下一个位置 self.next_fd = i + 1; return Ok(i as i32); @@ -1570,7 +1597,8 @@ impl FileDescriptorVec { // 扩容后,第一个新位置就是空的 let new_fd = current_len; - self.fds[new_fd] = Some(Arc::new(file)); + self.fds[new_fd] = Some(file); + self.cloexec[new_fd] = cloexec; // 更新 next_fd self.next_fd = new_fd + 1; return Ok(new_fd as i32); @@ -1640,6 +1668,8 @@ impl FileDescriptorVec { // 把文件描述符数组对应位置设置为空 let file = self.fds[fd as usize].take().unwrap(); + // 清除 per-fd close_on_exec 标志 + self.cloexec[fd as usize] = false; // 更新 next_fd:如果释放的fd比当前next_fd小,则更新next_fd // 这确保下次分配时可以复用较小的fd号,符合POSIX语义 @@ -1656,19 +1686,34 @@ impl FileDescriptorVec { return FileDescriptorIterator::new(self); } + /// 获取指定 fd 的 close_on_exec 标志 + #[inline] + pub fn get_cloexec(&self, fd: i32) -> bool { + if !self.validate_fd(fd) { + return false; + } + self.cloexec[fd as usize] + } + + /// 设置指定 fd 的 close_on_exec 标志 + #[inline] + pub fn set_cloexec(&mut self, fd: i32, val: bool) { + if self.validate_fd(fd) { + self.cloexec[fd as usize] = val; + } + } + + /// 在 execve 时关闭所有设置了 close_on_exec 的文件描述符 pub fn close_on_exec(&mut self) { for i in 0..self.fds.len() { - if let Some(file) = &self.fds[i] { - let to_drop = file.close_on_exec(); - if to_drop { - if let Err(r) = self.drop_fd(i as i32) { - error!( - "Failed to close file: pid = {:?}, fd = {}, error = {:?}", - ProcessManager::current_pcb().raw_pid(), - i, - r - ); - } + if self.fds[i].is_some() && self.cloexec[i] { + if let Err(r) = self.drop_fd(i as i32) { + error!( + "Failed to close file: pid = {:?}, fd = {}, error = {:?}", + ProcessManager::current_pcb().raw_pid(), + i, + r + ); } } } diff --git a/kernel/src/filesystem/vfs/open.rs b/kernel/src/filesystem/vfs/open.rs index aff0981939..b11be1afc9 100644 --- a/kernel/src/filesystem/vfs/open.rs +++ b/kernel/src/filesystem/vfs/open.rs @@ -362,12 +362,13 @@ fn do_sys_openat2(dirfd: i32, path: &str, how: OpenHow) -> Result, ) -> Result { - // 检查 RLIMIT_NOFILE:newfd 必须小于软限制 + // 检查 RLIMIT_NOFILE:newfd 必须小于软限制(与 Linux ksys_dup3 一致,返回 EBADF) let nofile = crate::process::ProcessManager::current_pcb() .get_rlimit(crate::process::resource::RLimitID::Nofile) .rlim_cur; - if newfd as u64 >= nofile { - return Err(SystemError::EMFILE); - } - - // 确认oldfd, newid是否有效 - if !(fd_table_guard.validate_fd(oldfd) && fd_table_guard.validate_fd(newfd)) { + if newfd < 0 || newfd as u64 >= nofile { return Err(SystemError::EBADF); } if oldfd == newfd { - // 若oldfd与newfd相等 + // dup2(fd, fd) 语义:验证 oldfd 有效后原样返回(不修改 cloexec) + // 注意:dup3(fd, fd) 的 EINVAL 由 sys_dup3.rs 调用方在调用 do_dup3 之前处理 + fd_table_guard + .get_file_by_fd(oldfd) + .ok_or(SystemError::EBADF)?; return Ok(newfd as usize); } - let new_exists = fd_table_guard.get_file_by_fd(newfd).is_some(); - if new_exists { - // close newfd - if fd_table_guard.drop_fd(newfd).is_err() { - // An I/O error occurred while attempting to close fildes2. - return Err(SystemError::EIO); - } - } + // 验证 oldfd 有效(必须在当前 fd 表范围内且已打开) + // 注意:不需要验证 newfd 的范围,alloc_fd_arc 会自动扩容 fd 表 + // (与 Linux ksys_dup3 中先调用 expand_files(files, newfd) 扩容一致) let old_file = fd_table_guard .get_file_by_fd(oldfd) .ok_or(SystemError::EBADF)?; - let new_file = old_file.try_clone().ok_or(SystemError::EBADF)?; - if flags.contains(FileFlags::O_CLOEXEC) { - new_file.set_close_on_exec(true); - } else { - new_file.set_close_on_exec(false); + // 如果 newfd 已被占用,先关闭它 + if fd_table_guard.get_file_by_fd(newfd).is_some() && fd_table_guard.drop_fd(newfd).is_err() { + // An I/O error occurred while attempting to close fildes2. + return Err(SystemError::EIO); } - // 申请文件描述符,并把文件对象存入其中 + + let cloexec = flags.contains(FileFlags::O_CLOEXEC); + + // 共享同一个 open file description(Arc),符合 POSIX dup 语义 let res = fd_table_guard - .alloc_fd(new_file, Some(newfd)) + .alloc_fd_arc(old_file, Some(newfd), cloexec) .map(|x| x as usize); return res; } diff --git a/kernel/src/filesystem/vfs/syscall/sys_dup.rs b/kernel/src/filesystem/vfs/syscall/sys_dup.rs index f499049bdc..227a4f2565 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_dup.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_dup.rs @@ -26,11 +26,10 @@ impl Syscall for SysDupHandle { .get_file_by_fd(oldfd) .ok_or(SystemError::EBADF)?; - let new_file = old_file.try_clone().ok_or(SystemError::EBADF)?; - // dup默认非cloexec - new_file.set_close_on_exec(false); - // 申请文件描述符,并把文件对象存入其中 - let res = fd_table_guard.alloc_fd(new_file, None).map(|x| x as usize); + // dup 共享同一个 open file description(Arc),cloexec 默认 false + let res = fd_table_guard + .alloc_fd_arc(old_file, None, false) + .map(|x| x as usize); return res; } diff --git a/kernel/src/filesystem/vfs/syscall/sys_eventfd2.rs b/kernel/src/filesystem/vfs/syscall/sys_eventfd2.rs index 9f0e2ddb27..73b2d80a50 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_eventfd2.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_eventfd2.rs @@ -94,9 +94,12 @@ pub fn do_eventfd(init_val: u32, flags: u32) -> Result { } else { FileFlags::O_RDWR }; + let cloexec = flags.contains(EventFdFlags::EFD_CLOEXEC); let file = File::new(inode, filemode)?; let binding = ProcessManager::current_pcb().fd_table(); let mut fd_table_guard = binding.write(); - let fd = fd_table_guard.alloc_fd(file, None).map(|x| x as usize); + let fd = fd_table_guard + .alloc_fd(file, None, cloexec) + .map(|x| x as usize); return fd; } diff --git a/kernel/src/filesystem/vfs/syscall/sys_fcntl.rs b/kernel/src/filesystem/vfs/syscall/sys_fcntl.rs index 3a5949ac4c..17c44d1fff 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_fcntl.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_fcntl.rs @@ -111,15 +111,12 @@ impl SysFcntlHandle { return Err(SystemError::EMFILE); } FcntlCommand::GetFd => { - // Get file descriptor flags. + // Get file descriptor flags (close_on_exec is per-fd). let binding = ProcessManager::current_pcb().fd_table(); let fd_table_guard = binding.read(); - if let Some(file) = fd_table_guard.get_file_by_fd(fd) { - // drop guard 以避免无法调度的问题 - drop(fd_table_guard); - - if file.close_on_exec() { + if fd_table_guard.get_file_by_fd(fd).is_some() { + if fd_table_guard.get_cloexec(fd) { return Ok(FD_CLOEXEC as usize); } else { return Ok(0); @@ -128,19 +125,13 @@ impl SysFcntlHandle { return Err(SystemError::EBADF); } FcntlCommand::SetFd => { - // Set file descriptor flags. + // Set file descriptor flags (close_on_exec is per-fd). let binding = ProcessManager::current_pcb().fd_table(); - let fd_table_guard = binding.write(); + let mut fd_table_guard = binding.write(); - if let Some(file) = fd_table_guard.get_file_by_fd(fd) { - // drop guard 以避免无法调度的问题 - drop(fd_table_guard); + if fd_table_guard.get_file_by_fd(fd).is_some() { let arg = arg as u32; - if arg & FD_CLOEXEC != 0 { - file.set_close_on_exec(true); - } else { - file.set_close_on_exec(false); - } + fd_table_guard.set_cloexec(fd, arg & FD_CLOEXEC != 0); return Ok(0); } return Err(SystemError::EBADF); diff --git a/kernel/src/filesystem/vfs/syscall/sys_ioctl.rs b/kernel/src/filesystem/vfs/syscall/sys_ioctl.rs index 403aa95705..3009ee33e6 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_ioctl.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_ioctl.rs @@ -71,12 +71,11 @@ impl Syscall for SysIoctlHandle { FIONBIO => { return Self::handle_fionbio(&file, data); } - FIONCLEX => { - file.set_close_on_exec(false); - return Ok(0); - } - FIOCLEX => { - file.set_close_on_exec(true); + FIONCLEX | FIOCLEX => { + // close_on_exec 是 per-fd 属性,需要通过 fd 表设置 + let binding = ProcessManager::current_pcb().fd_table(); + let mut fd_table_guard = binding.write(); + fd_table_guard.set_cloexec(fd as i32, cmd == FIOCLEX); return Ok(0); } FIOASYNC => { diff --git a/kernel/src/ipc/syscall/sys_pipe2.rs b/kernel/src/ipc/syscall/sys_pipe2.rs index 100be88465..40deb2365b 100644 --- a/kernel/src/ipc/syscall/sys_pipe2.rs +++ b/kernel/src/ipc/syscall/sys_pipe2.rs @@ -38,14 +38,11 @@ pub(super) fn do_kernel_pipe2(fd: *mut i32, flags: FileFlags) -> Result = match args.type_ { // Kprobe @@ -409,7 +410,10 @@ pub fn perf_event_open( } let file = File::new(perf_event, file_mode)?; let fd_table = ProcessManager::current_pcb().fd_table(); - let fd = fd_table.write().alloc_fd(file, None).map(|x| x as usize)?; + let fd = fd_table + .write() + .alloc_fd(file, None, cloexec) + .map(|x| x as usize)?; Ok(fd) } diff --git a/kernel/src/process/fork.rs b/kernel/src/process/fork.rs index bbc8ec9a6a..bcfa91a1a1 100644 --- a/kernel/src/process/fork.rs +++ b/kernel/src/process/fork.rs @@ -725,7 +725,7 @@ impl ProcessManager { let r = current_pcb .fd_table() .write() - .alloc_fd(file, None) + .alloc_fd(file, None, true) .map(|fd| fd as usize); let mut writer = UserBufferWriter::new( diff --git a/kernel/src/process/stdio.rs b/kernel/src/process/stdio.rs index 2facb86ea0..45359c09d4 100644 --- a/kernel/src/process/stdio.rs +++ b/kernel/src/process/stdio.rs @@ -36,7 +36,7 @@ pub fn stdio_init() -> Result<(), SystemError> { ProcessManager::current_pcb() .fd_table() .write() - .alloc_fd(stdin, None) + .alloc_fd(stdin, None, false) .unwrap(), 0 ); @@ -44,7 +44,7 @@ pub fn stdio_init() -> Result<(), SystemError> { ProcessManager::current_pcb() .fd_table() .write() - .alloc_fd(stdout, None) + .alloc_fd(stdout, None, false) .unwrap(), 1 ); @@ -52,7 +52,7 @@ pub fn stdio_init() -> Result<(), SystemError> { ProcessManager::current_pcb() .fd_table() .write() - .alloc_fd(stderr, None) + .alloc_fd(stderr, None, false) .unwrap(), 2 ); diff --git a/kernel/src/process/syscall/sys_pidfdopen.rs b/kernel/src/process/syscall/sys_pidfdopen.rs index 310f5f3d32..1601865e21 100644 --- a/kernel/src/process/syscall/sys_pidfdopen.rs +++ b/kernel/src/process/syscall/sys_pidfdopen.rs @@ -61,10 +61,12 @@ impl Syscall for SysPidFdOpen { } // 存入pcb + // Linux 的 __pidfd_prepare() 无条件对 pidfd 设置 O_CLOEXEC, + // 无论用户传入什么 flags,pidfd 始终是 close-on-exec 的。 ProcessManager::current_pcb() .fd_table() .write() - .alloc_fd(file, None) + .alloc_fd(file, None, true) .map(|fd| fd as usize) } diff --git a/kernel/src/virt/kvm/kvm_dev.rs b/kernel/src/virt/kvm/kvm_dev.rs index d4df5f1281..10e14f5064 100644 --- a/kernel/src/virt/kvm/kvm_dev.rs +++ b/kernel/src/virt/kvm/kvm_dev.rs @@ -194,7 +194,7 @@ pub fn kvm_dev_ioctl_create_vm(_vmtype: usize) -> Result { let r = ProcessManager::current_pcb() .fd_table() .write() - .alloc_fd(file, None) + .alloc_fd(file, None, false) .map(|fd| fd as usize); return r; } diff --git a/kernel/src/virt/kvm/vm_dev.rs b/kernel/src/virt/kvm/vm_dev.rs index ea17d11498..6eaa446540 100644 --- a/kernel/src/virt/kvm/vm_dev.rs +++ b/kernel/src/virt/kvm/vm_dev.rs @@ -229,7 +229,7 @@ fn kvm_vm_ioctl_create_vcpu(id: u32) -> Result { let r = ProcessManager::current_pcb() .fd_table() .write() - .alloc_fd(file, None) + .alloc_fd(file, None, false) .map(|fd| fd as usize); return r; } diff --git a/kernel/src/virt/vm/kvm_dev.rs b/kernel/src/virt/vm/kvm_dev.rs index 2c0e2aa118..b72122a54e 100644 --- a/kernel/src/virt/vm/kvm_dev.rs +++ b/kernel/src/virt/vm/kvm_dev.rs @@ -89,7 +89,7 @@ impl LockedKvmInode { let current = ProcessManager::current_pcb(); let file = File::new(instance, FileFlags::O_RDWR)?; - let fd = current.fd_table().write().alloc_fd(file, None)?; + let fd = current.fd_table().write().alloc_fd(file, None, false)?; return Ok(fd as usize); } } diff --git a/kernel/src/virt/vm/kvm_host/mod.rs b/kernel/src/virt/vm/kvm_host/mod.rs index 0a18cce48f..ccee2c2d6a 100644 --- a/kernel/src/virt/vm/kvm_host/mod.rs +++ b/kernel/src/virt/vm/kvm_host/mod.rs @@ -178,7 +178,7 @@ impl Vm { let fd = ProcessManager::current_pcb() .fd_table() .write() - .alloc_fd(file, None)?; + .alloc_fd(file, None, false)?; Ok(fd as usize) } diff --git a/user/apps/c_unitest/test_dup_shared_fd.c b/user/apps/c_unitest/test_dup_shared_fd.c new file mode 100644 index 0000000000..b464aa9a5e --- /dev/null +++ b/user/apps/c_unitest/test_dup_shared_fd.c @@ -0,0 +1,444 @@ +/** + * test_dup_shared_fd.c - 验证 dup/dup2/dup3 共享 open file description 语义 + * + * POSIX 规定 dup 创建的新 fd 与旧 fd 共享同一个 open file description, + * 意味着它们共享文件偏移量和文件状态标志,但 close_on_exec 是 per-fd 独立的。 + * + * 本测试覆盖: + * 1. dup'd fd 共享文件偏移量(lseek 联动) + * 2. dup'd fd 共享文件状态标志(O_APPEND) + * 3. dup3(O_CLOEXEC) 只影响新 fd 的 close_on_exec + * 4. dup2 覆盖已有 fd 后共享 offset + * 5. 独立 open 的 fd 不共享 offset + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +static int test_count = 0; +static int fail_count = 0; + +#define TEST_ASSERT(cond, msg) \ + do { \ + test_count++; \ + if (!(cond)) { \ + fprintf(stderr, "FAIL [%d]: %s (line %d)\n", test_count, msg, \ + __LINE__); \ + fail_count++; \ + } else { \ + printf("PASS [%d]: %s\n", test_count, msg); \ + } \ + } while (0) + +#define TEST_FILE "/tmp/test_dup_shared_fd.tmp" + +static void cleanup(void) { unlink(TEST_FILE); } + +/** + * 测试 1: dup'd fd 共享文件偏移量 + * + * 对应 gvisor LseekTest::EtcPasswdDup: + * fd1 = open(file) + * fd2 = dup(fd1) + * lseek(fd1, 1000, SEEK_SET) + * assert(lseek(fd2, 0, SEEK_CUR) == 1000) // fd2 看到 fd1 的偏移 + */ +static void test_dup_shared_offset(void) { + printf("\n--- test_dup_shared_offset ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open test file"); + + /* 写入一些数据 */ + char buf[2048]; + memset(buf, 'A', sizeof(buf)); + ssize_t nw = write(fd1, buf, sizeof(buf)); + TEST_ASSERT(nw == sizeof(buf), "write 2048 bytes"); + + /* 回到开头 */ + off_t pos = lseek(fd1, 0, SEEK_SET); + TEST_ASSERT(pos == 0, "lseek fd1 to 0"); + + /* dup */ + int fd2 = dup(fd1); + TEST_ASSERT(fd2 >= 0, "dup(fd1)"); + + /* 两个 fd 都在 offset 0 */ + pos = lseek(fd1, 0, SEEK_CUR); + TEST_ASSERT(pos == 0, "fd1 at offset 0"); + pos = lseek(fd2, 0, SEEK_CUR); + TEST_ASSERT(pos == 0, "fd2 at offset 0 (shared)"); + + /* 通过 fd1 seek 到 1000 */ + pos = lseek(fd1, 1000, SEEK_SET); + TEST_ASSERT(pos == 1000, "lseek fd1 to 1000"); + + /* fd2 应该也在 1000 — 这是 dup 共享 offset 的核心语义 */ + pos = lseek(fd2, 0, SEEK_CUR); + TEST_ASSERT(pos == 1000, "fd2 also at 1000 after fd1 seek (shared offset)"); + + /* 再 dup 一个 fd3,也应该在 1000 */ + int fd3 = dup(fd1); + TEST_ASSERT(fd3 >= 0, "dup(fd1) -> fd3"); + pos = lseek(fd3, 0, SEEK_CUR); + TEST_ASSERT(pos == 1000, "fd3 also at 1000 (shared offset)"); + + close(fd3); + close(fd2); + close(fd1); +} + +/** + * 测试 2: 独立 open 的 fd 不共享 offset + */ +static void test_independent_open_no_share(void) { + printf("\n--- test_independent_open_no_share ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open fd1"); + char buf[1024]; + memset(buf, 'B', sizeof(buf)); + (void)write(fd1, buf, sizeof(buf)); + + int fd2 = open(TEST_FILE, O_RDONLY); + TEST_ASSERT(fd2 >= 0, "open fd2 independently"); + + lseek(fd1, 500, SEEK_SET); + off_t pos = lseek(fd2, 0, SEEK_CUR); + TEST_ASSERT(pos == 0, + "fd2 at 0, not affected by fd1 seek (independent open)"); + + close(fd2); + close(fd1); +} + +/** + * 测试 3: dup'd fd 共享文件状态标志 (O_APPEND) + * + * 通过 fcntl(F_SETFL) 在一个 fd 上设置 O_APPEND, + * dup'd fd 也应该能看到 O_APPEND(因为它们共享同一个 open file description)。 + */ +static void test_dup_shared_flags(void) { + printf("\n--- test_dup_shared_flags ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open test file"); + + int fd2 = dup(fd1); + TEST_ASSERT(fd2 >= 0, "dup(fd1)"); + + /* 确认初始没有 O_APPEND */ + int flags1 = fcntl(fd1, F_GETFL); + TEST_ASSERT(!(flags1 & O_APPEND), "fd1 initially no O_APPEND"); + int flags2 = fcntl(fd2, F_GETFL); + TEST_ASSERT(!(flags2 & O_APPEND), "fd2 initially no O_APPEND"); + + /* 通过 fd1 设置 O_APPEND */ + int ret = fcntl(fd1, F_SETFL, flags1 | O_APPEND); + TEST_ASSERT(ret == 0, "fcntl F_SETFL O_APPEND on fd1"); + + /* fd2 也应该看到 O_APPEND — 因为共享同一个 File */ + flags2 = fcntl(fd2, F_GETFL); + TEST_ASSERT(flags2 & O_APPEND, + "fd2 sees O_APPEND after fd1 set it (shared flags)"); + + close(fd2); + close(fd1); +} + +/** + * 测试 4: close_on_exec 是 per-fd 独立的 + * + * dup() 默认不设置 cloexec。 + * dup3(fd, newfd, O_CLOEXEC) 只对 newfd 设置 cloexec,不影响 oldfd。 + * fcntl(F_SETFD, FD_CLOEXEC) 也只影响指定的 fd。 + */ +static void test_cloexec_per_fd(void) { + printf("\n--- test_cloexec_per_fd ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open test file"); + + /* dup() 默认不设置 cloexec */ + int fd2 = dup(fd1); + TEST_ASSERT(fd2 >= 0, "dup(fd1)"); + + int cloexec1 = fcntl(fd1, F_GETFD); + int cloexec2 = fcntl(fd2, F_GETFD); + TEST_ASSERT(!(cloexec1 & FD_CLOEXEC), "fd1 no cloexec"); + TEST_ASSERT(!(cloexec2 & FD_CLOEXEC), "fd2 no cloexec (dup default)"); + + /* 通过 fcntl 在 fd1 设置 cloexec */ + fcntl(fd1, F_SETFD, FD_CLOEXEC); + cloexec1 = fcntl(fd1, F_GETFD); + cloexec2 = fcntl(fd2, F_GETFD); + TEST_ASSERT(cloexec1 & FD_CLOEXEC, "fd1 has cloexec after F_SETFD"); + TEST_ASSERT(!(cloexec2 & FD_CLOEXEC), + "fd2 still no cloexec (per-fd independent)"); + + close(fd2); + + /* dup3 with O_CLOEXEC */ + int fd3 = 100; /* 使用一个高 fd 号 */ + int ret = dup3(fd1, fd3, O_CLOEXEC); + TEST_ASSERT(ret == fd3, "dup3(fd1, 100, O_CLOEXEC)"); + + int cloexec3 = fcntl(fd3, F_GETFD); + cloexec1 = fcntl(fd1, F_GETFD); + TEST_ASSERT(cloexec3 & FD_CLOEXEC, "fd3 has cloexec (dup3 O_CLOEXEC)"); + TEST_ASSERT(cloexec1 & FD_CLOEXEC, + "fd1 cloexec unchanged (was set earlier)"); + + /* 清除 fd1 的 cloexec,不影响 fd3 */ + fcntl(fd1, F_SETFD, 0); + cloexec1 = fcntl(fd1, F_GETFD); + cloexec3 = fcntl(fd3, F_GETFD); + TEST_ASSERT(!(cloexec1 & FD_CLOEXEC), "fd1 cloexec cleared"); + TEST_ASSERT(cloexec3 & FD_CLOEXEC, + "fd3 cloexec unchanged (per-fd independent)"); + + close(fd3); + close(fd1); +} + +/** + * 测试 5: dup2 覆盖已有 fd 后共享 offset + */ +static void test_dup2_shared_offset(void) { + printf("\n--- test_dup2_shared_offset ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open test file"); + char buf[2048]; + memset(buf, 'C', sizeof(buf)); + (void)write(fd1, buf, sizeof(buf)); + lseek(fd1, 500, SEEK_SET); + + /* 打开另一个 fd 用于被 dup2 覆盖 */ + int fd2 = open(TEST_FILE, O_RDONLY); + TEST_ASSERT(fd2 >= 0, "open fd2"); + + /* fd2 独立打开,offset 为 0 */ + off_t pos = lseek(fd2, 0, SEEK_CUR); + TEST_ASSERT(pos == 0, "fd2 at 0 before dup2"); + + /* dup2(fd1, fd2):关闭旧 fd2,让 fd2 共享 fd1 的 File */ + int ret = dup2(fd1, fd2); + TEST_ASSERT(ret == fd2, "dup2(fd1, fd2) returns fd2"); + + /* 现在 fd2 应该和 fd1 共享 offset (500) */ + pos = lseek(fd2, 0, SEEK_CUR); + TEST_ASSERT(pos == 500, "fd2 at 500 after dup2 (shared with fd1)"); + + /* 通过 fd2 seek,fd1 也应该同步 */ + lseek(fd2, 1000, SEEK_SET); + pos = lseek(fd1, 0, SEEK_CUR); + TEST_ASSERT(pos == 1000, "fd1 at 1000 after fd2 seek (shared offset)"); + + close(fd2); + close(fd1); +} + +/** + * 测试 6: read 通过 dup'd fd 共享偏移量 + */ +static void test_dup_read_advances_shared_offset(void) { + printf("\n--- test_dup_read_advances_shared_offset ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open test file"); + + /* 写入 100 字节 */ + char wbuf[100]; + memset(wbuf, 'D', sizeof(wbuf)); + (void)write(fd1, wbuf, sizeof(wbuf)); + lseek(fd1, 0, SEEK_SET); + + int fd2 = dup(fd1); + TEST_ASSERT(fd2 >= 0, "dup(fd1)"); + + /* 通过 fd1 读 30 字节 */ + char rbuf[30]; + ssize_t nr = read(fd1, rbuf, sizeof(rbuf)); + TEST_ASSERT(nr == 30, "read 30 bytes via fd1"); + + /* fd2 的偏移量应该也是 30 */ + off_t pos = lseek(fd2, 0, SEEK_CUR); + TEST_ASSERT(pos == 30, "fd2 at 30 after fd1 read (shared offset)"); + + /* 通过 fd2 再读 20 字节 */ + char rbuf2[20]; + nr = read(fd2, rbuf2, sizeof(rbuf2)); + TEST_ASSERT(nr == 20, "read 20 bytes via fd2"); + + /* fd1 的偏移量应该是 50 */ + pos = lseek(fd1, 0, SEEK_CUR); + TEST_ASSERT(pos == 50, "fd1 at 50 after fd2 read (shared offset)"); + + close(fd2); + close(fd1); +} + +/** + * 测试 7: dup2(oldfd, oldfd) 返回 oldfd 且不做任何改变 + */ +static void test_dup2_same_fd(void) { + printf("\n--- test_dup2_same_fd ---\n"); + + int fd = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd >= 0, "open test file"); + + int ret = dup2(fd, fd); + TEST_ASSERT(ret == fd, "dup2(fd, fd) returns fd"); + + /* fd 仍然有效 */ + int flags = fcntl(fd, F_GETFL); + TEST_ASSERT(flags >= 0, "fd still valid after dup2(fd, fd)"); + + close(fd); +} + +/** + * 测试 8: dup2 到高位 fd(超出初始 fd 表大小) + * + * Linux 的 ksys_dup3 调用 expand_files(files, newfd) 自动扩容 fd 表, + * 只要 newfd < RLIMIT_NOFILE 就是合法的。 + * 验证 BUG-3 修复:之前 validate_fd(newfd) 阻止了高位 fd。 + */ +static void test_dup2_high_fd(void) { + printf("\n--- test_dup2_high_fd ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open test file"); + + /* 写入一些数据 */ + char buf[64]; + memset(buf, 'E', sizeof(buf)); + (void)write(fd1, buf, sizeof(buf)); + lseek(fd1, 42, SEEK_SET); + + /* dup2 到一个高位 fd(超出默认 fd 表大小 1024) */ + int high_fd = 1500; + int ret = dup2(fd1, high_fd); + TEST_ASSERT(ret == high_fd, "dup2(fd1, 1500) succeeds"); + + /* 验证高位 fd 和原 fd 共享 offset */ + off_t pos = lseek(high_fd, 0, SEEK_CUR); + TEST_ASSERT(pos == 42, "high_fd at 42 (shared offset with fd1)"); + + /* 通过高位 fd seek,原 fd 也应该同步 */ + lseek(high_fd, 99, SEEK_SET); + pos = lseek(fd1, 0, SEEK_CUR); + TEST_ASSERT(pos == 99, "fd1 at 99 after high_fd seek (shared offset)"); + + /* 验证高位 fd 的 cloexec 默认为 false(dup2 语义) */ + int cloexec = fcntl(high_fd, F_GETFD); + TEST_ASSERT(!(cloexec & FD_CLOEXEC), "high_fd no cloexec (dup2 default)"); + + close(high_fd); + close(fd1); +} + +/** + * 测试 9: dup3(fd, fd, 0) 返回 EINVAL + * + * POSIX/Linux 规定 dup3 在 oldfd == newfd 时必须返回 EINVAL, + * 与 dup2(fd, fd) 的 no-op 语义不同。 + */ +static void test_dup3_same_fd_einval(void) { + printf("\n--- test_dup3_same_fd_einval ---\n"); + + int fd = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd >= 0, "open test file"); + + int ret = dup3(fd, fd, 0); + TEST_ASSERT(ret == -1, "dup3(fd, fd, 0) returns -1"); + TEST_ASSERT(errno == EINVAL, "dup3(fd, fd, 0) sets errno to EINVAL"); + + /* 同样带 O_CLOEXEC 的情况 */ + ret = dup3(fd, fd, O_CLOEXEC); + TEST_ASSERT(ret == -1, "dup3(fd, fd, O_CLOEXEC) returns -1"); + TEST_ASSERT(errno == EINVAL, + "dup3(fd, fd, O_CLOEXEC) sets errno to EINVAL"); + + /* fd 仍然有效 */ + int flags = fcntl(fd, F_GETFL); + TEST_ASSERT(flags >= 0, "fd still valid after failed dup3"); + + close(fd); +} + +/** + * 测试 10: 关闭原 fd 后 dup'd fd 仍然有效(引用计数正确) + * + * dup 共享 Arc,关闭原 fd 只是减少引用计数, + * dup'd fd 仍然持有引用,应该可以正常使用。 + */ +static void test_dup_close_original(void) { + printf("\n--- test_dup_close_original ---\n"); + + int fd1 = open(TEST_FILE, O_RDWR | O_CREAT | O_TRUNC, 0644); + TEST_ASSERT(fd1 >= 0, "open test file"); + + /* 写入数据 */ + const char *msg = "Hello, dup refcount!"; + ssize_t nw = write(fd1, msg, strlen(msg)); + TEST_ASSERT(nw == (ssize_t)strlen(msg), "write message"); + + /* dup fd1 -> fd2 */ + int fd2 = dup(fd1); + TEST_ASSERT(fd2 >= 0, "dup(fd1)"); + + /* 关闭原 fd */ + close(fd1); + + /* fd2 仍然有效:可以 seek 和 read */ + off_t pos = lseek(fd2, 0, SEEK_SET); + TEST_ASSERT(pos == 0, "lseek fd2 to 0 after closing fd1"); + + char rbuf[64]; + memset(rbuf, 0, sizeof(rbuf)); + ssize_t nr = read(fd2, rbuf, sizeof(rbuf)); + TEST_ASSERT(nr == (ssize_t)strlen(msg), "read from fd2 after closing fd1"); + TEST_ASSERT(memcmp(rbuf, msg, strlen(msg)) == 0, + "fd2 reads correct data after fd1 closed"); + + /* 通过 fd2 写入更多数据 */ + const char *msg2 = " Still works!"; + nw = write(fd2, msg2, strlen(msg2)); + TEST_ASSERT(nw == (ssize_t)strlen(msg2), + "write via fd2 after fd1 closed"); + + close(fd2); +} + +int main(void) { + cleanup(); + + test_dup_shared_offset(); + test_independent_open_no_share(); + test_dup_shared_flags(); + test_cloexec_per_fd(); + test_dup2_shared_offset(); + test_dup_read_advances_shared_offset(); + test_dup2_same_fd(); + test_dup2_high_fd(); + test_dup3_same_fd_einval(); + test_dup_close_original(); + + cleanup(); + + printf("\n========================================\n"); + printf("Total: %d tests, %d passed, %d failed\n", test_count, + test_count - fail_count, fail_count); + printf("========================================\n"); + + return fail_count > 0 ? 1 : 0; +} diff --git a/user/apps/tests/syscall/gvisor/blocklists/lseek_test b/user/apps/tests/syscall/gvisor/blocklists/lseek_test index 88b63563ab..bdca16cb6c 100644 --- a/user/apps/tests/syscall/gvisor/blocklists/lseek_test +++ b/user/apps/tests/syscall/gvisor/blocklists/lseek_test @@ -1,6 +1,2 @@ -# 暂时不支持该测例,因为/proc/stat不支持 -LseekTest.ProcStatTwice -# 暂时不支持该测例,目前dup与linux的行为不一致,目前实现为直接克隆了一个新的文件结构体给新的fd,导致lseek的偏移量不一致 -LseekTest.EtcPasswdDup # 未找到Seek_Hole的支持 LseekTest.SeekDataAndSeekHole