From 007f65fdfa860f75066838440532acdbda8aeefe Mon Sep 17 00:00:00 2001 From: longjin Date: Wed, 11 Feb 2026 15:34:07 +0800 Subject: [PATCH] feat(vfs): implement flock advisory file locking - Add flock syscall support with shared/exclusive locks and nonblocking mode - Introduce open file description ID for flock owner tracking - Add flock manager with sharded hashmap for lock state management - Support lock inheritance across dup/fork and release on last close - Add comprehensive test suite covering basic operations and edge cases - Fix SCM_RIGHTS fd passing to share open file description instead of cloning Signed-off-by: longjin --- kernel/src/filesystem/vfs/file.rs | 16 + kernel/src/filesystem/vfs/flock.rs | 334 ++++++++++++ kernel/src/filesystem/vfs/mod.rs | 3 +- kernel/src/filesystem/vfs/mount.rs | 6 + kernel/src/filesystem/vfs/syscall/mod.rs | 1 + .../src/filesystem/vfs/syscall/sys_flock.rs | 88 ++++ kernel/src/filesystem/vfs/vcore.rs | 1 + kernel/src/net/socket/unix/datagram/mod.rs | 4 +- kernel/src/net/socket/unix/stream/mod.rs | 4 +- user/apps/c_unitest/test_flock.c | 492 ++++++++++++++++++ user/apps/tests/syscall/gvisor/whitelist.txt | 1 + 11 files changed, 945 insertions(+), 5 deletions(-) create mode 100644 kernel/src/filesystem/vfs/flock.rs create mode 100644 kernel/src/filesystem/vfs/syscall/sys_flock.rs create mode 100644 user/apps/c_unitest/test_flock.c diff --git a/kernel/src/filesystem/vfs/file.rs b/kernel/src/filesystem/vfs/file.rs index 1dd009770c..5616f3d117 100644 --- a/kernel/src/filesystem/vfs/file.rs +++ b/kernel/src/filesystem/vfs/file.rs @@ -46,6 +46,12 @@ use crate::{ use crate::filesystem::vfs::InodeMode; const MAX_LFS_FILESIZE: i64 = i64::MAX; +static NEXT_OPEN_FILE_ID: AtomicUsize = AtomicUsize::new(1); + +#[inline] +fn alloc_open_file_id() -> usize { + NEXT_OPEN_FILE_ID.fetch_add(1, Ordering::Relaxed) +} #[derive(Clone, Copy, Debug)] enum OffsetUpdate { @@ -381,6 +387,8 @@ impl FileMode { /// @brief 抽象文件结构体 #[derive(Debug)] pub struct File { + /// 唯一 open file description id,用于 flock owner 标识。 + open_file_id: usize, inode: Arc, /// 对于文件,表示字节偏移量;对于文件夹,表示当前操作的子目录项偏移量 offset: AtomicUsize, @@ -603,6 +611,7 @@ impl File { } let f = File { + open_file_id: alloc_open_file_id(), inode, offset: AtomicUsize::new(0), flags: RwSem::new(flags), @@ -1130,6 +1139,7 @@ impl File { /// @return Option 克隆后的文件结构体。如果克隆失败,返回None pub fn try_clone(&self) -> Option { let res = Self { + open_file_id: alloc_open_file_id(), inode: self.inode.clone(), offset: AtomicUsize::new(self.offset.load(Ordering::SeqCst)), flags: RwSem::new(self.flags()), @@ -1160,6 +1170,11 @@ impl File { return self.file_type; } + #[inline] + pub fn open_file_id(&self) -> usize { + self.open_file_id + } + /// 获取当前文件偏移(等价于用户态的 file position)。 #[inline] pub fn pos(&self) -> usize { @@ -1360,6 +1375,7 @@ impl File { impl Drop for File { fn drop(&mut self) { + super::flock::release_all_for_file(self); let r: Result<(), SystemError> = self.inode.close(self.private_data.lock()); // 打印错误信息 if r.is_err() { diff --git a/kernel/src/filesystem/vfs/flock.rs b/kernel/src/filesystem/vfs/flock.rs new file mode 100644 index 0000000000..49a798404a --- /dev/null +++ b/kernel/src/filesystem/vfs/flock.rs @@ -0,0 +1,334 @@ +use alloc::{sync::Arc, vec::Vec}; + +use hashbrown::{HashMap, HashSet}; +use jhash::jhash2; +use system_error::SystemError; + +use crate::libs::{casting::DowncastArc, lazy_init::Lazy, mutex::Mutex, wait_queue::WaitQueue}; + +use super::{file::File, mount::MountFSInode, IndexNode, InodeId}; + +const FLOCK_SHARDS: usize = 53; +type OwnerId = usize; + +#[derive(Clone, Copy, Eq, PartialEq, Hash)] +struct FlockKey { + dev_id: usize, + inode_id: InodeId, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum FlockOperation { + Shared, + Exclusive, + Unlock, +} + +#[derive(Default)] +struct FlockEntryState { + exclusive_owner: Option, + shared_owners: HashSet, +} + +impl FlockEntryState { + #[inline] + fn owner_lock(&self, owner: OwnerId) -> Option { + if self.exclusive_owner == Some(owner) { + Some(FlockOperation::Exclusive) + } else if self.shared_owners.contains(&owner) { + Some(FlockOperation::Shared) + } else { + None + } + } + + #[inline] + fn remove_owner(&mut self, owner: OwnerId) -> bool { + let mut changed = false; + if self.exclusive_owner == Some(owner) { + self.exclusive_owner = None; + changed = true; + } + if self.shared_owners.remove(&owner) { + changed = true; + } + changed + } + + #[inline] + fn has_conflict(&self, owner: OwnerId, req: FlockOperation) -> bool { + match req { + FlockOperation::Shared => self + .exclusive_owner + .is_some_and(|exclusive_owner| exclusive_owner != owner), + FlockOperation::Exclusive => { + if self + .exclusive_owner + .is_some_and(|exclusive_owner| exclusive_owner != owner) + { + return true; + } + self.shared_owners + .iter() + .any(|shared_owner| *shared_owner != owner) + } + FlockOperation::Unlock => false, + } + } + + #[inline] + fn acquire(&mut self, owner: OwnerId, req: FlockOperation) { + match req { + FlockOperation::Shared => { + debug_assert!(self.exclusive_owner.is_none()); + self.shared_owners.insert(owner); + } + FlockOperation::Exclusive => { + debug_assert!(self.exclusive_owner.is_none()); + debug_assert!(self.shared_owners.is_empty()); + self.exclusive_owner = Some(owner); + } + FlockOperation::Unlock => {} + } + } + + #[inline] + fn is_empty(&self) -> bool { + self.exclusive_owner.is_none() && self.shared_owners.is_empty() + } +} + +struct FlockEntry { + state: Mutex, + waitq: WaitQueue, +} + +impl FlockEntry { + #[inline] + fn new() -> Self { + Self { + state: Mutex::new(FlockEntryState::default()), + waitq: WaitQueue::default(), + } + } + + #[inline] + fn unlock_owner(&self, owner: OwnerId) -> bool { + self.state.lock().remove_owner(owner) + } + + #[inline] + fn is_empty(&self) -> bool { + self.state.lock().is_empty() + } +} + +struct FlockShard { + map: Mutex>>, +} + +pub struct FlockManager { + shards: Vec, +} + +impl FlockManager { + pub fn new() -> Self { + let mut shards = Vec::with_capacity(FLOCK_SHARDS); + for _ in 0..FLOCK_SHARDS { + shards.push(FlockShard { + map: Mutex::new(HashMap::new()), + }); + } + Self { shards } + } + + #[inline] + fn shard_index(key: &FlockKey) -> usize { + let dev_id = key.dev_id as u64; + let inode_id = key.inode_id.data() as u64; + let key_array = [ + (dev_id >> 32) as u32, + dev_id as u32, + (inode_id >> 32) as u32, + inode_id as u32, + ]; + let hash = jhash2(&key_array, 0); + (hash as usize) % FLOCK_SHARDS + } + + #[inline] + fn shard(&self, key: &FlockKey) -> &FlockShard { + &self.shards[Self::shard_index(key)] + } + + fn canonical_inode_for_lock(file: &File) -> Arc { + // 对 flock key 计算,统一剥离 MountFSInode 包装,避免 mount 侧 + // metadata.dev_id 合成策略导致同一底层 inode 被误判为不同锁对象。 + let mut inode = file.inode(); + loop { + match inode.clone().downcast_arc::() { + Some(mnt_inode) => inode = mnt_inode.underlying_inode(), + None => return inode, + } + } + } + + fn key_from_file(file: &File) -> Result { + let inode = Self::canonical_inode_for_lock(file); + let md = inode.metadata()?; + Ok(FlockKey { + dev_id: md.dev_id, + inode_id: md.inode_id, + }) + } + + fn get_or_create_entry(&self, key: FlockKey) -> Arc { + let shard = self.shard(&key); + let mut guard = shard.map.lock(); + guard + .entry(key) + .or_insert_with(|| Arc::new(FlockEntry::new())) + .clone() + } + + fn get_entry(&self, key: &FlockKey) -> Option> { + let shard = self.shard(key); + shard.map.lock().get(key).cloned() + } + + fn lock_or_wait( + entry: &Arc, + owner: OwnerId, + req: FlockOperation, + nonblocking: bool, + ) -> Result<(), SystemError> { + debug_assert!(matches!( + req, + FlockOperation::Shared | FlockOperation::Exclusive + )); + + let mut dropped_old_lock = false; + { + let mut state = entry.state.lock(); + if let Some(current_lock) = state.owner_lock(owner) { + if current_lock == req { + return Ok(()); + } + let _ = state.remove_owner(owner); + dropped_old_lock = true; + } + + if !state.has_conflict(owner, req) { + state.acquire(owner, req); + drop(state); + if dropped_old_lock { + entry.waitq.wakeup_all(None); + } + return Ok(()); + } + } + + if dropped_old_lock { + entry.waitq.wakeup_all(None); + } + + if nonblocking { + return Err(SystemError::EAGAIN_OR_EWOULDBLOCK); + } + + entry.waitq.wait_until_interruptible(|| { + let mut state = entry.state.lock(); + if state.has_conflict(owner, req) { + None + } else { + state.acquire(owner, req); + Some(()) + } + })?; + + Ok(()) + } + + fn try_cleanup_entry(&self, key: &FlockKey, entry: &Arc) { + if !entry.is_empty() || !entry.waitq.is_empty() { + return; + } + + let shard = self.shard(key); + let mut guard = shard.map.lock(); + if let Some(current) = guard.get(key) { + if Arc::ptr_eq(current, entry) + && entry.is_empty() + && entry.waitq.is_empty() + && Arc::strong_count(entry) == 2 + { + guard.remove(key); + } + } + } + + pub fn apply( + &self, + file: &Arc, + operation: FlockOperation, + nonblocking: bool, + ) -> Result<(), SystemError> { + let key = Self::key_from_file(file.as_ref())?; + let owner = file.open_file_id(); + let entry = self.get_or_create_entry(key); + + let result = match operation { + FlockOperation::Unlock => { + if entry.unlock_owner(owner) { + entry.waitq.wakeup_all(None); + } + Ok(()) + } + FlockOperation::Shared | FlockOperation::Exclusive => { + Self::lock_or_wait(&entry, owner, operation, nonblocking) + } + }; + + self.try_cleanup_entry(&key, &entry); + result + } + + pub fn release_file(&self, file: &File) { + let Ok(key) = Self::key_from_file(file) else { + return; + }; + let owner = file.open_file_id(); + let Some(entry) = self.get_entry(&key) else { + return; + }; + + if entry.unlock_owner(owner) { + entry.waitq.wakeup_all(None); + } + self.try_cleanup_entry(&key, &entry); + } +} + +static FLOCK_MANAGER: Lazy = Lazy::new(); + +pub fn init_flock_manager() { + if !FLOCK_MANAGER.initialized() { + FLOCK_MANAGER.init(FlockManager::new()); + } +} + +#[inline] +pub fn apply_flock( + file: &Arc, + operation: FlockOperation, + nonblocking: bool, +) -> Result<(), SystemError> { + FLOCK_MANAGER.get().apply(file, operation, nonblocking) +} + +pub fn release_all_for_file(file: &File) { + if !FLOCK_MANAGER.initialized() { + return; + } + FLOCK_MANAGER.get().release_file(file); +} diff --git a/kernel/src/filesystem/vfs/mod.rs b/kernel/src/filesystem/vfs/mod.rs index c6174b8974..c68440e181 100644 --- a/kernel/src/filesystem/vfs/mod.rs +++ b/kernel/src/filesystem/vfs/mod.rs @@ -2,6 +2,7 @@ pub mod append_lock; pub mod fasync; pub mod fcntl; pub mod file; +pub mod flock; pub mod iov; pub mod mount; pub mod open; @@ -11,8 +12,8 @@ pub mod syscall; pub mod utils; pub mod vcore; -use ::core::{any::Any, fmt::Debug, fmt::Display, sync::atomic::AtomicUsize}; use alloc::{string::String, sync::Arc, vec::Vec}; +use core::{any::Any, fmt::Debug, fmt::Display, sync::atomic::AtomicUsize}; use derive_builder::Builder; use intertrait::CastFromSync; use mount::MountFlags; diff --git a/kernel/src/filesystem/vfs/mount.rs b/kernel/src/filesystem/vfs/mount.rs index 31bd3dae1e..33ed0e44f4 100644 --- a/kernel/src/filesystem/vfs/mount.rs +++ b/kernel/src/filesystem/vfs/mount.rs @@ -428,6 +428,12 @@ impl Drop for MountFS { } impl MountFSInode { + /// 返回被挂载包装器包裹的底层 inode。 + #[inline] + pub(super) fn underlying_inode(&self) -> Arc { + self.inner_inode.clone() + } + /// @brief 用Arc指针包裹MountFSInode对象。 /// 本函数的主要功能为,初始化MountFSInode对象中的自引用Weak指针 /// 本函数只应在构造器中被调用 diff --git a/kernel/src/filesystem/vfs/syscall/mod.rs b/kernel/src/filesystem/vfs/syscall/mod.rs index ac0a00ecc0..b81462eedb 100644 --- a/kernel/src/filesystem/vfs/syscall/mod.rs +++ b/kernel/src/filesystem/vfs/syscall/mod.rs @@ -26,6 +26,7 @@ mod sys_fchmodat; mod sys_fchown; mod sys_fchownat; mod sys_fcntl; +mod sys_flock; mod sys_fstatfs; mod sys_ftruncate; mod sys_getcwd; diff --git a/kernel/src/filesystem/vfs/syscall/sys_flock.rs b/kernel/src/filesystem/vfs/syscall/sys_flock.rs new file mode 100644 index 0000000000..c44cefa101 --- /dev/null +++ b/kernel/src/filesystem/vfs/syscall/sys_flock.rs @@ -0,0 +1,88 @@ +use core::sync::atomic::{AtomicBool, Ordering}; + +use alloc::vec::Vec; +use log::warn; +use system_error::SystemError; + +use crate::{ + arch::{interrupt::TrapFrame, syscall::nr::SYS_FLOCK}, + filesystem::vfs::{ + file::FileMode, + flock::{apply_flock, FlockOperation}, + }, + process::ProcessManager, + syscall::table::{FormattedSyscallParam, Syscall}, +}; + +const LOCK_SH: u32 = 1; +const LOCK_EX: u32 = 2; +const LOCK_NB: u32 = 4; +const LOCK_UN: u32 = 8; +const LOCK_MAND: u32 = 32; + +static WARNED_LOCK_MAND: AtomicBool = AtomicBool::new(false); + +pub struct SysFlockHandle; + +impl Syscall for SysFlockHandle { + fn num_args(&self) -> usize { + 2 + } + + fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result { + let fd = args[0] as i32; + let cmd = args[1] as u32; + + if (cmd & LOCK_MAND) != 0 { + if !WARNED_LOCK_MAND.swap(true, Ordering::Relaxed) { + warn!( + "flock: LOCK_MAND support has been removed; request ignored (Linux compatible)" + ); + } + return Ok(0); + } + + let (operation, nonblocking) = parse_flock_cmd(cmd)?; + + let binding = ProcessManager::current_pcb().fd_table(); + let fd_table_guard = binding.read(); + let file = fd_table_guard + .get_file_by_fd(fd) + .ok_or(SystemError::EBADF)?; + drop(fd_table_guard); + + if operation != FlockOperation::Unlock + && !file + .mode() + .intersects(FileMode::FMODE_READ | FileMode::FMODE_WRITE) + { + return Err(SystemError::EBADF); + } + + apply_flock(&file, operation, nonblocking)?; + Ok(0) + } + + fn entry_format(&self, args: &[usize]) -> Vec { + vec![ + FormattedSyscallParam::new("fd", format!("{:#x}", args[0] as i32)), + FormattedSyscallParam::new("cmd", format!("{:#x}", args[1] as u32)), + ] + } +} + +fn parse_flock_cmd(mut cmd: u32) -> Result<(FlockOperation, bool), SystemError> { + let nonblocking = (cmd & LOCK_NB) != 0; + cmd &= !LOCK_NB; + + let operation = match cmd { + LOCK_SH => FlockOperation::Shared, + LOCK_EX => FlockOperation::Exclusive, + LOCK_UN => FlockOperation::Unlock, + _ => return Err(SystemError::EINVAL), + }; + + Ok((operation, nonblocking)) +} + +syscall_table_macros::declare_syscall!(SYS_FLOCK, SysFlockHandle); diff --git a/kernel/src/filesystem/vfs/vcore.rs b/kernel/src/filesystem/vfs/vcore.rs index 76da2b7227..08aac58402 100644 --- a/kernel/src/filesystem/vfs/vcore.rs +++ b/kernel/src/filesystem/vfs/vcore.rs @@ -55,6 +55,7 @@ pub fn generate_inode_id() -> InodeId { pub fn vfs_init() -> Result<(), SystemError> { // Initialize global append lock manager before any file write path uses it. super::append_lock::init_append_lock_manager(); + super::flock::init_flock_manager(); mnt_namespace_init(); diff --git a/kernel/src/net/socket/unix/datagram/mod.rs b/kernel/src/net/socket/unix/datagram/mod.rs index 28ea427e9d..666299576a 100644 --- a/kernel/src/net/socket/unix/datagram/mod.rs +++ b/kernel/src/net/socket/unix/datagram/mod.rs @@ -1025,8 +1025,8 @@ impl Socket for UnixDatagramSocket { let fd_table_binding = ProcessManager::current_pcb().fd_table(); let mut fd_table = fd_table_binding.write(); for file in rights.iter().take(fit) { - let new_file = file.as_ref().try_clone().ok_or(SystemError::EINVAL)?; - let new_fd = fd_table.alloc_fd(new_file, None, cloexec)?; + // SCM_RIGHTS 复制的是 fd 引用,必须共享同一个 open file description。 + let new_fd = fd_table.alloc_fd_arc(file.clone(), None, cloexec)?; received_fds.push(new_fd); } } diff --git a/kernel/src/net/socket/unix/stream/mod.rs b/kernel/src/net/socket/unix/stream/mod.rs index fa55f25f9a..a9a8e4aa3f 100644 --- a/kernel/src/net/socket/unix/stream/mod.rs +++ b/kernel/src/net/socket/unix/stream/mod.rs @@ -978,8 +978,8 @@ impl Socket for UnixStreamSocket { let fd_table_binding = ProcessManager::current_pcb().fd_table(); let mut fd_table = fd_table_binding.write(); for file in scm_rights.iter().take(fit) { - let new_file = file.as_ref().try_clone().ok_or(SystemError::EINVAL)?; - let new_fd = fd_table.alloc_fd(new_file, None, cloexec)?; + // SCM_RIGHTS 复制的是 fd 引用,必须共享同一个 open file description。 + let new_fd = fd_table.alloc_fd_arc(file.clone(), None, cloexec)?; received_fds.push(new_fd); } } diff --git a/user/apps/c_unitest/test_flock.c b/user/apps/c_unitest/test_flock.c new file mode 100644 index 0000000000..6639ed949a --- /dev/null +++ b/user/apps/c_unitest/test_flock.c @@ -0,0 +1,492 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int g_total = 0; +static int g_failed = 0; + +#define CHECK(cond, msg) \ + do { \ + g_total++; \ + if (!(cond)) { \ + g_failed++; \ + fprintf(stderr, "FAIL: %s (line %d)\n", msg, __LINE__); \ + } else { \ + printf("PASS: %s\n", msg); \ + } \ + } while (0) + +static int is_wouldblock_errno(int err) { return err == EAGAIN || err == EWOULDBLOCK; } + +static int open_rw_file(const char *path) { + return open(path, O_RDWR | O_CREAT, 0644); +} + +static void test_invalid_commands(const char *path) { + int fd = open_rw_file(path); + CHECK(fd >= 0, "open file for invalid command test"); + if (fd < 0) { + return; + } + + errno = 0; + CHECK(flock(fd, LOCK_EX | LOCK_SH | LOCK_NB) == -1 && errno == EINVAL, + "LOCK_EX|LOCK_SH|LOCK_NB returns EINVAL"); + + errno = 0; + CHECK(flock(fd, LOCK_EX | LOCK_UN | LOCK_NB) == -1 && errno == EINVAL, + "LOCK_EX|LOCK_UN|LOCK_NB returns EINVAL"); + + errno = 0; + CHECK(flock(fd, LOCK_NB) == -1 && errno == EINVAL, + "LOCK_NB without operation returns EINVAL"); + + close(fd); +} + +static void test_basic_lock_unlock(const char *path) { + int fd = open_rw_file(path); + CHECK(fd >= 0, "open file for basic flock"); + if (fd < 0) { + return; + } + + CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "LOCK_EX|LOCK_NB succeeds"); + CHECK(flock(fd, LOCK_UN) == 0, "LOCK_UN after exclusive succeeds"); + CHECK(flock(fd, LOCK_SH | LOCK_NB) == 0, "LOCK_SH|LOCK_NB succeeds"); + CHECK(flock(fd, LOCK_UN) == 0, "LOCK_UN after shared succeeds"); + + close(fd); +} + +static void test_nonblocking_conflict(const char *path) { + int fd1 = open_rw_file(path); + int fd2 = open_rw_file(path); + CHECK(fd1 >= 0 && fd2 >= 0, "open two independent fds"); + if (fd1 < 0 || fd2 < 0) { + if (fd1 >= 0) + close(fd1); + if (fd2 >= 0) + close(fd2); + return; + } + + CHECK(flock(fd1, LOCK_EX | LOCK_NB) == 0, "fd1 takes exclusive lock"); + errno = 0; + CHECK(flock(fd2, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno), + "fd2 nonblocking exclusive lock conflicts"); + CHECK(flock(fd1, LOCK_UN) == 0, "fd1 unlock succeeds"); + CHECK(flock(fd2, LOCK_EX | LOCK_NB) == 0, "fd2 lock succeeds after fd1 unlock"); + CHECK(flock(fd2, LOCK_UN) == 0, "fd2 unlock succeeds"); + + close(fd2); + close(fd1); +} + +static void test_dup_unlock_release(const char *path) { + int fd = open_rw_file(path); + int dupfd = dup(fd); + int other = open_rw_file(path); + CHECK(fd >= 0 && dupfd >= 0 && other >= 0, "open/dup for dup unlock test"); + if (fd < 0 || dupfd < 0 || other < 0) { + if (fd >= 0) + close(fd); + if (dupfd >= 0) + close(dupfd); + if (other >= 0) + close(other); + return; + } + + CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "original fd takes exclusive lock"); + errno = 0; + CHECK(flock(other, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno), + "unrelated fd is blocked by dup-shared lock"); + CHECK(flock(dupfd, LOCK_UN) == 0, "LOCK_UN via dup fd releases lock"); + CHECK(flock(other, LOCK_EX | LOCK_NB) == 0, "unrelated fd can lock after dup unlock"); + CHECK(flock(other, LOCK_UN) == 0, "unrelated fd unlock succeeds"); + + close(other); + close(dupfd); + close(fd); +} + +static void test_dup_last_close_release(const char *path) { + int fd = open_rw_file(path); + int dupfd = dup(fd); + int other = open_rw_file(path); + CHECK(fd >= 0 && dupfd >= 0 && other >= 0, "open/dup for last-close release test"); + if (fd < 0 || dupfd < 0 || other < 0) { + if (fd >= 0) + close(fd); + if (dupfd >= 0) + close(dupfd); + if (other >= 0) + close(other); + return; + } + + CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "original fd takes exclusive lock"); + + close(dupfd); + errno = 0; + CHECK(flock(other, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno), + "closing one dup fd does not release lock"); + + close(fd); + CHECK(flock(other, LOCK_EX | LOCK_NB) == 0, + "last close of open-file-description releases lock"); + CHECK(flock(other, LOCK_UN) == 0, "unlock after last-close release succeeds"); + + close(other); +} + +static void test_fork_unlock_release(const char *path) { + int fd = open_rw_file(path); + int other = open_rw_file(path); + CHECK(fd >= 0 && other >= 0, "open fds for fork flock test"); + if (fd < 0 || other < 0) { + if (fd >= 0) + close(fd); + if (other >= 0) + close(other); + return; + } + + CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "parent acquires exclusive lock"); + + pid_t pid = fork(); + CHECK(pid >= 0, "fork for flock test"); + if (pid < 0) { + close(other); + close(fd); + return; + } + + if (pid == 0) { + int rc = 0; + + errno = 0; + if (!(flock(other, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno))) { + rc = 1; + } + + if (rc == 0 && flock(fd, LOCK_UN) != 0) { + rc = 1; + } + + if (rc == 0 && flock(other, LOCK_EX | LOCK_NB) != 0) { + rc = 1; + } + if (rc == 0 && flock(other, LOCK_UN) != 0) { + rc = 1; + } + + close(other); + close(fd); + _exit(rc); + } + + int status = 0; + CHECK(waitpid(pid, &status, 0) == pid, "wait child for flock fork test"); + CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0, + "child unlock on inherited fd releases shared lock"); + + close(other); + close(fd); +} + +static volatile sig_atomic_t g_sigalrm_seen = 0; + +static void sigalrm_handler(int sig) { + (void)sig; + g_sigalrm_seen = 1; +} + +static void test_blocking_interrupted_by_signal(const char *path) { + int fd1 = open_rw_file(path); + int fd2 = open_rw_file(path); + CHECK(fd1 >= 0 && fd2 >= 0, "open two independent fds for EINTR test"); + if (fd1 < 0 || fd2 < 0) { + if (fd1 >= 0) + close(fd1); + if (fd2 >= 0) + close(fd2); + return; + } + + CHECK(flock(fd1, LOCK_EX | LOCK_NB) == 0, "fd1 takes lock before blocking flock"); + + struct sigaction old_act; + struct sigaction act; + memset(&act, 0, sizeof(act)); + act.sa_handler = sigalrm_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + CHECK(sigaction(SIGALRM, &act, &old_act) == 0, "install SIGALRM handler"); + + g_sigalrm_seen = 0; + alarm(1); + errno = 0; + CHECK(flock(fd2, LOCK_EX) == -1 && errno == EINTR, + "blocking flock interrupted by signal returns EINTR"); + alarm(0); + CHECK(g_sigalrm_seen != 0, "SIGALRM handler executed"); + + CHECK(sigaction(SIGALRM, &old_act, NULL) == 0, "restore SIGALRM handler"); + CHECK(flock(fd1, LOCK_UN) == 0, "fd1 unlock after EINTR test"); + + close(fd2); + close(fd1); +} + +static void test_opath_ebadf(const char *path) { +#ifdef O_PATH + int fd = open(path, O_RDONLY | O_PATH, 0); + CHECK(fd >= 0, "open O_PATH file"); + if (fd < 0) { + return; + } + + errno = 0; + CHECK(flock(fd, LOCK_EX | LOCK_NB) == -1 && errno == EBADF, + "flock on O_PATH fd returns EBADF"); + close(fd); +#else + (void)path; + printf("SKIP: O_PATH is unavailable in headers\n"); +#endif +} + +static void test_pipe_flock(void) { + int p[2]; + int rc = pipe(p); + CHECK(rc == 0, "create pipe for flock test"); + if (rc != 0) { + return; + } + + CHECK(flock(p[0], LOCK_EX | LOCK_NB) == 0, "pipe read end lock succeeds"); + errno = 0; + CHECK(flock(p[1], LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno), + "pipe write end lock conflicts"); + CHECK(flock(p[0], LOCK_UN) == 0, "pipe read end unlock succeeds"); + CHECK(flock(p[1], LOCK_EX | LOCK_NB) == 0, "pipe write end lock succeeds after unlock"); + CHECK(flock(p[1], LOCK_UN) == 0, "pipe write end unlock succeeds"); + + close(p[0]); + close(p[1]); +} + +static void test_socket_flock(void) { + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + CHECK(sock >= 0, "create UNIX socket for flock test"); + if (sock < 0) { + return; + } + + CHECK(flock(sock, LOCK_EX | LOCK_NB) == 0, "flock on socket succeeds"); + CHECK(flock(sock, LOCK_UN) == 0, "unlock socket flock succeeds"); + + close(sock); +} + +static void test_blocking_downgrade_wakeup(const char *path) { + int fd = open_rw_file(path); + CHECK(fd >= 0, "open file for downgrade wakeup test"); + if (fd < 0) + return; + + CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "parent acquires LOCK_EX"); + + int pipefd[2]; + CHECK(pipe(pipefd) == 0, "create pipe for downgrade wakeup test"); + + pid_t pid = fork(); + CHECK(pid >= 0, "fork for downgrade wakeup test"); + if (pid < 0) { + close(fd); + close(pipefd[0]); + close(pipefd[1]); + return; + } + + if (pid == 0) { + close(pipefd[0]); + int child_fd = open_rw_file(path); + if (child_fd < 0) + _exit(1); + + /* This should block until the parent downgrades to LOCK_SH */ + if (flock(child_fd, LOCK_SH) != 0) + _exit(2); + + /* Notify parent that we acquired the lock */ + char ok = 1; + write(pipefd[1], &ok, 1); + + flock(child_fd, LOCK_UN); + close(child_fd); + close(pipefd[1]); + _exit(0); + } + + /* Parent: give child time to enter blocking flock */ + close(pipefd[1]); + usleep(200000); + + /* Downgrade from LOCK_EX to LOCK_SH — should wake the child */ + CHECK(flock(fd, LOCK_SH | LOCK_NB) == 0, "parent downgrades to LOCK_SH"); + + /* Wait for child to signal success, with a timeout via alarm */ + struct sigaction old_act; + struct sigaction act; + memset(&act, 0, sizeof(act)); + act.sa_handler = sigalrm_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGALRM, &act, &old_act); + + g_sigalrm_seen = 0; + alarm(5); + + char buf = 0; + int r = read(pipefd[0], &buf, 1); + alarm(0); + sigaction(SIGALRM, &old_act, NULL); + + CHECK(r == 1 && buf == 1, "child acquired LOCK_SH after parent downgrade"); + + int status = 0; + waitpid(pid, &status, 0); + CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0, + "child exited successfully in downgrade wakeup test"); + + flock(fd, LOCK_UN); + close(fd); + close(pipefd[0]); +} + +static void test_blocking_upgrade_wakeup(const char *path) { + int fd1 = open_rw_file(path); + int fd2 = open_rw_file(path); + CHECK(fd1 >= 0 && fd2 >= 0, "open fds for upgrade wakeup test"); + if (fd1 < 0 || fd2 < 0) { + if (fd1 >= 0) + close(fd1); + if (fd2 >= 0) + close(fd2); + return; + } + + CHECK(flock(fd1, LOCK_SH | LOCK_NB) == 0, "fd1 acquires LOCK_SH"); + CHECK(flock(fd2, LOCK_SH | LOCK_NB) == 0, "fd2 acquires LOCK_SH"); + + int pipefd[2]; + CHECK(pipe(pipefd) == 0, "create pipe for upgrade wakeup test"); + + pid_t pid = fork(); + CHECK(pid >= 0, "fork for upgrade wakeup test"); + if (pid < 0) { + close(fd1); + close(fd2); + close(pipefd[0]); + close(pipefd[1]); + return; + } + + if (pid == 0) { + close(pipefd[0]); + /* Child inherited both shared locks via fd1 and fd2. + * Release fd1's shared lock so only fd2 remains from child side. */ + flock(fd1, LOCK_UN); + close(fd1); + + /* Try to upgrade fd2 to LOCK_EX — should block because parent still holds fd1 LOCK_SH */ + if (flock(fd2, LOCK_EX) != 0) + _exit(2); + + /* Notify parent that we acquired the lock */ + char ok = 1; + write(pipefd[1], &ok, 1); + + flock(fd2, LOCK_UN); + close(fd2); + close(pipefd[1]); + _exit(0); + } + + /* Parent: give child time to enter blocking flock */ + close(pipefd[1]); + usleep(200000); + + /* Release parent's LOCK_SH on fd1 — should unblock the child's LOCK_EX on fd2 */ + CHECK(flock(fd1, LOCK_UN) == 0, "parent releases LOCK_SH on fd1"); + + /* Wait for child to signal success, with a timeout via alarm */ + struct sigaction old_act; + struct sigaction act; + memset(&act, 0, sizeof(act)); + act.sa_handler = sigalrm_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGALRM, &act, &old_act); + + g_sigalrm_seen = 0; + alarm(5); + + char buf = 0; + int r = read(pipefd[0], &buf, 1); + alarm(0); + sigaction(SIGALRM, &old_act, NULL); + + CHECK(r == 1 && buf == 1, "child acquired LOCK_EX after parent released LOCK_SH"); + + int status = 0; + waitpid(pid, &status, 0); + CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0, + "child exited successfully in upgrade wakeup test"); + + flock(fd2, LOCK_UN); + close(fd1); + close(fd2); + close(pipefd[0]); +} + +int main(void) { + char path[128]; + snprintf(path, sizeof(path), "/tmp/test_flock_%d.tmp", getpid()); + + int initfd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0644); + CHECK(initfd >= 0, "create flock test file"); + if (initfd >= 0) { + close(initfd); + } + + test_invalid_commands(path); + test_basic_lock_unlock(path); + test_nonblocking_conflict(path); + test_dup_unlock_release(path); + test_dup_last_close_release(path); + test_fork_unlock_release(path); + test_blocking_interrupted_by_signal(path); + test_opath_ebadf(path); + test_pipe_flock(); + test_socket_flock(); + test_blocking_downgrade_wakeup(path); + test_blocking_upgrade_wakeup(path); + + unlink(path); + + printf("test_flock summary: total=%d failed=%d\n", g_total, g_failed); + return g_failed == 0 ? 0 : 1; +} diff --git a/user/apps/tests/syscall/gvisor/whitelist.txt b/user/apps/tests/syscall/gvisor/whitelist.txt index 595b65d788..5ab44904b3 100644 --- a/user/apps/tests/syscall/gvisor/whitelist.txt +++ b/user/apps/tests/syscall/gvisor/whitelist.txt @@ -39,6 +39,7 @@ pwritev2_test utimes_test truncate_test fadvise_test +flock_test open_test open_create_test select_test