From 007f65fdfa860f75066838440532acdbda8aeefe Mon Sep 17 00:00:00 2001
From: longjin <longjin@DragonOS.org>
Date: Wed, 11 Feb 2026 15:34:07 +0800
Subject: [PATCH] feat(vfs): implement flock advisory file locking

- Add flock syscall support with shared/exclusive locks and nonblocking mode
- Introduce open file description ID for flock owner tracking
- Add flock manager with sharded hashmap for lock state management
- Support lock inheritance across dup/fork and release on last close
- Add comprehensive test suite covering basic operations and edge cases
- Fix SCM_RIGHTS fd passing to share open file description instead of cloning

Signed-off-by: longjin <longjin@DragonOS.org>
---
 kernel/src/filesystem/vfs/file.rs             |  16 +
 kernel/src/filesystem/vfs/flock.rs            | 334 ++++++++++++
 kernel/src/filesystem/vfs/mod.rs              |   3 +-
 kernel/src/filesystem/vfs/mount.rs            |   6 +
 kernel/src/filesystem/vfs/syscall/mod.rs      |   1 +
 .../src/filesystem/vfs/syscall/sys_flock.rs   |  88 ++++
 kernel/src/filesystem/vfs/vcore.rs            |   1 +
 kernel/src/net/socket/unix/datagram/mod.rs    |   4 +-
 kernel/src/net/socket/unix/stream/mod.rs      |   4 +-
 user/apps/c_unitest/test_flock.c              | 492 ++++++++++++++++++
 user/apps/tests/syscall/gvisor/whitelist.txt  |   1 +
 11 files changed, 945 insertions(+), 5 deletions(-)
 create mode 100644 kernel/src/filesystem/vfs/flock.rs
 create mode 100644 kernel/src/filesystem/vfs/syscall/sys_flock.rs
 create mode 100644 user/apps/c_unitest/test_flock.c
diff --git a/kernel/src/filesystem/vfs/file.rs b/kernel/src/filesystem/vfs/file.rs
index 1dd009770c..5616f3d117 100644
--- a/kernel/src/filesystem/vfs/file.rs
+++ b/kernel/src/filesystem/vfs/file.rs
@@ -46,6 +46,12 @@ use crate::{
 use crate::filesystem::vfs::InodeMode;
 
 const MAX_LFS_FILESIZE: i64 = i64::MAX;
+static NEXT_OPEN_FILE_ID: AtomicUsize = AtomicUsize::new(1);
+
+#[inline]
+fn alloc_open_file_id() -> usize {
+    NEXT_OPEN_FILE_ID.fetch_add(1, Ordering::Relaxed)
+}
 
 #[derive(Clone, Copy, Debug)]
 enum OffsetUpdate {
@@ -381,6 +387,8 @@ impl FileMode {
 /// @brief 抽象文件结构体
 #[derive(Debug)]
 pub struct File {
+    /// 唯一 open file description id，用于 flock owner 标识。
+    open_file_id: usize,
     inode: Arc<dyn IndexNode>,
     /// 对于文件，表示字节偏移量；对于文件夹，表示当前操作的子目录项偏移量
     offset: AtomicUsize,
@@ -603,6 +611,7 @@ impl File {
         }
 
         let f = File {
+            open_file_id: alloc_open_file_id(),
             inode,
             offset: AtomicUsize::new(0),
             flags: RwSem::new(flags),
@@ -1130,6 +1139,7 @@ impl File {
     /// @return Option<File> 克隆后的文件结构体。如果克隆失败，返回None
     pub fn try_clone(&self) -> Option<File> {
         let res = Self {
+            open_file_id: alloc_open_file_id(),
             inode: self.inode.clone(),
             offset: AtomicUsize::new(self.offset.load(Ordering::SeqCst)),
             flags: RwSem::new(self.flags()),
@@ -1160,6 +1170,11 @@ impl File {
         return self.file_type;
     }
 
+    #[inline]
+    pub fn open_file_id(&self) -> usize {
+        self.open_file_id
+    }
+
     /// 获取当前文件偏移（等价于用户态的 file position）。
     #[inline]
     pub fn pos(&self) -> usize {
@@ -1360,6 +1375,7 @@ impl File {
 
 impl Drop for File {
     fn drop(&mut self) {
+        super::flock::release_all_for_file(self);
         let r: Result<(), SystemError> = self.inode.close(self.private_data.lock());
         // 打印错误信息
         if r.is_err() {
diff --git a/kernel/src/filesystem/vfs/flock.rs b/kernel/src/filesystem/vfs/flock.rs
new file mode 100644
index 0000000000..49a798404a
--- /dev/null
+++ b/kernel/src/filesystem/vfs/flock.rs
@@ -0,0 +1,334 @@
+use alloc::{sync::Arc, vec::Vec};
+
+use hashbrown::{HashMap, HashSet};
+use jhash::jhash2;
+use system_error::SystemError;
+
+use crate::libs::{casting::DowncastArc, lazy_init::Lazy, mutex::Mutex, wait_queue::WaitQueue};
+
+use super::{file::File, mount::MountFSInode, IndexNode, InodeId};
+
+const FLOCK_SHARDS: usize = 53;
+type OwnerId = usize;
+
+#[derive(Clone, Copy, Eq, PartialEq, Hash)]
+struct FlockKey {
+    dev_id: usize,
+    inode_id: InodeId,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum FlockOperation {
+    Shared,
+    Exclusive,
+    Unlock,
+}
+
+#[derive(Default)]
+struct FlockEntryState {
+    exclusive_owner: Option<OwnerId>,
+    shared_owners: HashSet<OwnerId>,
+}
+
+impl FlockEntryState {
+    #[inline]
+    fn owner_lock(&self, owner: OwnerId) -> Option<FlockOperation> {
+        if self.exclusive_owner == Some(owner) {
+            Some(FlockOperation::Exclusive)
+        } else if self.shared_owners.contains(&owner) {
+            Some(FlockOperation::Shared)
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn remove_owner(&mut self, owner: OwnerId) -> bool {
+        let mut changed = false;
+        if self.exclusive_owner == Some(owner) {
+            self.exclusive_owner = None;
+            changed = true;
+        }
+        if self.shared_owners.remove(&owner) {
+            changed = true;
+        }
+        changed
+    }
+
+    #[inline]
+    fn has_conflict(&self, owner: OwnerId, req: FlockOperation) -> bool {
+        match req {
+            FlockOperation::Shared => self
+                .exclusive_owner
+                .is_some_and(|exclusive_owner| exclusive_owner != owner),
+            FlockOperation::Exclusive => {
+                if self
+                    .exclusive_owner
+                    .is_some_and(|exclusive_owner| exclusive_owner != owner)
+                {
+                    return true;
+                }
+                self.shared_owners
+                    .iter()
+                    .any(|shared_owner| *shared_owner != owner)
+            }
+            FlockOperation::Unlock => false,
+        }
+    }
+
+    #[inline]
+    fn acquire(&mut self, owner: OwnerId, req: FlockOperation) {
+        match req {
+            FlockOperation::Shared => {
+                debug_assert!(self.exclusive_owner.is_none());
+                self.shared_owners.insert(owner);
+            }
+            FlockOperation::Exclusive => {
+                debug_assert!(self.exclusive_owner.is_none());
+                debug_assert!(self.shared_owners.is_empty());
+                self.exclusive_owner = Some(owner);
+            }
+            FlockOperation::Unlock => {}
+        }
+    }
+
+    #[inline]
+    fn is_empty(&self) -> bool {
+        self.exclusive_owner.is_none() && self.shared_owners.is_empty()
+    }
+}
+
+struct FlockEntry {
+    state: Mutex<FlockEntryState>,
+    waitq: WaitQueue,
+}
+
+impl FlockEntry {
+    #[inline]
+    fn new() -> Self {
+        Self {
+            state: Mutex::new(FlockEntryState::default()),
+            waitq: WaitQueue::default(),
+        }
+    }
+
+    #[inline]
+    fn unlock_owner(&self, owner: OwnerId) -> bool {
+        self.state.lock().remove_owner(owner)
+    }
+
+    #[inline]
+    fn is_empty(&self) -> bool {
+        self.state.lock().is_empty()
+    }
+}
+
+struct FlockShard {
+    map: Mutex<HashMap<FlockKey, Arc<FlockEntry>>>,
+}
+
+pub struct FlockManager {
+    shards: Vec<FlockShard>,
+}
+
+impl FlockManager {
+    pub fn new() -> Self {
+        let mut shards = Vec::with_capacity(FLOCK_SHARDS);
+        for _ in 0..FLOCK_SHARDS {
+            shards.push(FlockShard {
+                map: Mutex::new(HashMap::new()),
+            });
+        }
+        Self { shards }
+    }
+
+    #[inline]
+    fn shard_index(key: &FlockKey) -> usize {
+        let dev_id = key.dev_id as u64;
+        let inode_id = key.inode_id.data() as u64;
+        let key_array = [
+            (dev_id >> 32) as u32,
+            dev_id as u32,
+            (inode_id >> 32) as u32,
+            inode_id as u32,
+        ];
+        let hash = jhash2(&key_array, 0);
+        (hash as usize) % FLOCK_SHARDS
+    }
+
+    #[inline]
+    fn shard(&self, key: &FlockKey) -> &FlockShard {
+        &self.shards[Self::shard_index(key)]
+    }
+
+    fn canonical_inode_for_lock(file: &File) -> Arc<dyn IndexNode> {
+        // 对 flock key 计算，统一剥离 MountFSInode 包装，避免 mount 侧
+        // metadata.dev_id 合成策略导致同一底层 inode 被误判为不同锁对象。
+        let mut inode = file.inode();
+        loop {
+            match inode.clone().downcast_arc::<MountFSInode>() {
+                Some(mnt_inode) => inode = mnt_inode.underlying_inode(),
+                None => return inode,
+            }
+        }
+    }
+
+    fn key_from_file(file: &File) -> Result<FlockKey, SystemError> {
+        let inode = Self::canonical_inode_for_lock(file);
+        let md = inode.metadata()?;
+        Ok(FlockKey {
+            dev_id: md.dev_id,
+            inode_id: md.inode_id,
+        })
+    }
+
+    fn get_or_create_entry(&self, key: FlockKey) -> Arc<FlockEntry> {
+        let shard = self.shard(&key);
+        let mut guard = shard.map.lock();
+        guard
+            .entry(key)
+            .or_insert_with(|| Arc::new(FlockEntry::new()))
+            .clone()
+    }
+
+    fn get_entry(&self, key: &FlockKey) -> Option<Arc<FlockEntry>> {
+        let shard = self.shard(key);
+        shard.map.lock().get(key).cloned()
+    }
+
+    fn lock_or_wait(
+        entry: &Arc<FlockEntry>,
+        owner: OwnerId,
+        req: FlockOperation,
+        nonblocking: bool,
+    ) -> Result<(), SystemError> {
+        debug_assert!(matches!(
+            req,
+            FlockOperation::Shared | FlockOperation::Exclusive
+        ));
+
+        let mut dropped_old_lock = false;
+        {
+            let mut state = entry.state.lock();
+            if let Some(current_lock) = state.owner_lock(owner) {
+                if current_lock == req {
+                    return Ok(());
+                }
+                let _ = state.remove_owner(owner);
+                dropped_old_lock = true;
+            }
+
+            if !state.has_conflict(owner, req) {
+                state.acquire(owner, req);
+                drop(state);
+                if dropped_old_lock {
+                    entry.waitq.wakeup_all(None);
+                }
+                return Ok(());
+            }
+        }
+
+        if dropped_old_lock {
+            entry.waitq.wakeup_all(None);
+        }
+
+        if nonblocking {
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+
+        entry.waitq.wait_until_interruptible(|| {
+            let mut state = entry.state.lock();
+            if state.has_conflict(owner, req) {
+                None
+            } else {
+                state.acquire(owner, req);
+                Some(())
+            }
+        })?;
+
+        Ok(())
+    }
+
+    fn try_cleanup_entry(&self, key: &FlockKey, entry: &Arc<FlockEntry>) {
+        if !entry.is_empty() || !entry.waitq.is_empty() {
+            return;
+        }
+
+        let shard = self.shard(key);
+        let mut guard = shard.map.lock();
+        if let Some(current) = guard.get(key) {
+            if Arc::ptr_eq(current, entry)
+                && entry.is_empty()
+                && entry.waitq.is_empty()
+                && Arc::strong_count(entry) == 2
+            {
+                guard.remove(key);
+            }
+        }
+    }
+
+    pub fn apply(
+        &self,
+        file: &Arc<File>,
+        operation: FlockOperation,
+        nonblocking: bool,
+    ) -> Result<(), SystemError> {
+        let key = Self::key_from_file(file.as_ref())?;
+        let owner = file.open_file_id();
+        let entry = self.get_or_create_entry(key);
+
+        let result = match operation {
+            FlockOperation::Unlock => {
+                if entry.unlock_owner(owner) {
+                    entry.waitq.wakeup_all(None);
+                }
+                Ok(())
+            }
+            FlockOperation::Shared | FlockOperation::Exclusive => {
+                Self::lock_or_wait(&entry, owner, operation, nonblocking)
+            }
+        };
+
+        self.try_cleanup_entry(&key, &entry);
+        result
+    }
+
+    pub fn release_file(&self, file: &File) {
+        let Ok(key) = Self::key_from_file(file) else {
+            return;
+        };
+        let owner = file.open_file_id();
+        let Some(entry) = self.get_entry(&key) else {
+            return;
+        };
+
+        if entry.unlock_owner(owner) {
+            entry.waitq.wakeup_all(None);
+        }
+        self.try_cleanup_entry(&key, &entry);
+    }
+}
+
+static FLOCK_MANAGER: Lazy<FlockManager> = Lazy::new();
+
+pub fn init_flock_manager() {
+    if !FLOCK_MANAGER.initialized() {
+        FLOCK_MANAGER.init(FlockManager::new());
+    }
+}
+
+#[inline]
+pub fn apply_flock(
+    file: &Arc<File>,
+    operation: FlockOperation,
+    nonblocking: bool,
+) -> Result<(), SystemError> {
+    FLOCK_MANAGER.get().apply(file, operation, nonblocking)
+}
+
+pub fn release_all_for_file(file: &File) {
+    if !FLOCK_MANAGER.initialized() {
+        return;
+    }
+    FLOCK_MANAGER.get().release_file(file);
+}
diff --git a/kernel/src/filesystem/vfs/mod.rs b/kernel/src/filesystem/vfs/mod.rs
index c6174b8974..c68440e181 100644
--- a/kernel/src/filesystem/vfs/mod.rs
+++ b/kernel/src/filesystem/vfs/mod.rs
@@ -2,6 +2,7 @@ pub mod append_lock;
 pub mod fasync;
 pub mod fcntl;
 pub mod file;
+pub mod flock;
 pub mod iov;
 pub mod mount;
 pub mod open;
@@ -11,8 +12,8 @@ pub mod syscall;
 pub mod utils;
 pub mod vcore;
 
-use ::core::{any::Any, fmt::Debug, fmt::Display, sync::atomic::AtomicUsize};
 use alloc::{string::String, sync::Arc, vec::Vec};
+use core::{any::Any, fmt::Debug, fmt::Display, sync::atomic::AtomicUsize};
 use derive_builder::Builder;
 use intertrait::CastFromSync;
 use mount::MountFlags;
diff --git a/kernel/src/filesystem/vfs/mount.rs b/kernel/src/filesystem/vfs/mount.rs
index 31bd3dae1e..33ed0e44f4 100644
--- a/kernel/src/filesystem/vfs/mount.rs
+++ b/kernel/src/filesystem/vfs/mount.rs
@@ -428,6 +428,12 @@ impl Drop for MountFS {
 }
 
 impl MountFSInode {
+    /// 返回被挂载包装器包裹的底层 inode。
+    #[inline]
+    pub(super) fn underlying_inode(&self) -> Arc<dyn IndexNode> {
+        self.inner_inode.clone()
+    }
+
     /// @brief 用Arc指针包裹MountFSInode对象。
     /// 本函数的主要功能为，初始化MountFSInode对象中的自引用Weak指针
     /// 本函数只应在构造器中被调用
diff --git a/kernel/src/filesystem/vfs/syscall/mod.rs b/kernel/src/filesystem/vfs/syscall/mod.rs
index ac0a00ecc0..b81462eedb 100644
--- a/kernel/src/filesystem/vfs/syscall/mod.rs
+++ b/kernel/src/filesystem/vfs/syscall/mod.rs
@@ -26,6 +26,7 @@ mod sys_fchmodat;
 mod sys_fchown;
 mod sys_fchownat;
 mod sys_fcntl;
+mod sys_flock;
 mod sys_fstatfs;
 mod sys_ftruncate;
 mod sys_getcwd;
diff --git a/kernel/src/filesystem/vfs/syscall/sys_flock.rs b/kernel/src/filesystem/vfs/syscall/sys_flock.rs
new file mode 100644
index 0000000000..c44cefa101
--- /dev/null
+++ b/kernel/src/filesystem/vfs/syscall/sys_flock.rs
@@ -0,0 +1,88 @@
+use core::sync::atomic::{AtomicBool, Ordering};
+
+use alloc::vec::Vec;
+use log::warn;
+use system_error::SystemError;
+
+use crate::{
+    arch::{interrupt::TrapFrame, syscall::nr::SYS_FLOCK},
+    filesystem::vfs::{
+        file::FileMode,
+        flock::{apply_flock, FlockOperation},
+    },
+    process::ProcessManager,
+    syscall::table::{FormattedSyscallParam, Syscall},
+};
+
+const LOCK_SH: u32 = 1;
+const LOCK_EX: u32 = 2;
+const LOCK_NB: u32 = 4;
+const LOCK_UN: u32 = 8;
+const LOCK_MAND: u32 = 32;
+
+static WARNED_LOCK_MAND: AtomicBool = AtomicBool::new(false);
+
+pub struct SysFlockHandle;
+
+impl Syscall for SysFlockHandle {
+    fn num_args(&self) -> usize {
+        2
+    }
+
+    fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result<usize, SystemError> {
+        let fd = args[0] as i32;
+        let cmd = args[1] as u32;
+
+        if (cmd & LOCK_MAND) != 0 {
+            if !WARNED_LOCK_MAND.swap(true, Ordering::Relaxed) {
+                warn!(
+                    "flock: LOCK_MAND support has been removed; request ignored (Linux compatible)"
+                );
+            }
+            return Ok(0);
+        }
+
+        let (operation, nonblocking) = parse_flock_cmd(cmd)?;
+
+        let binding = ProcessManager::current_pcb().fd_table();
+        let fd_table_guard = binding.read();
+        let file = fd_table_guard
+            .get_file_by_fd(fd)
+            .ok_or(SystemError::EBADF)?;
+        drop(fd_table_guard);
+
+        if operation != FlockOperation::Unlock
+            && !file
+                .mode()
+                .intersects(FileMode::FMODE_READ | FileMode::FMODE_WRITE)
+        {
+            return Err(SystemError::EBADF);
+        }
+
+        apply_flock(&file, operation, nonblocking)?;
+        Ok(0)
+    }
+
+    fn entry_format(&self, args: &[usize]) -> Vec<FormattedSyscallParam> {
+        vec![
+            FormattedSyscallParam::new("fd", format!("{:#x}", args[0] as i32)),
+            FormattedSyscallParam::new("cmd", format!("{:#x}", args[1] as u32)),
+        ]
+    }
+}
+
+fn parse_flock_cmd(mut cmd: u32) -> Result<(FlockOperation, bool), SystemError> {
+    let nonblocking = (cmd & LOCK_NB) != 0;
+    cmd &= !LOCK_NB;
+
+    let operation = match cmd {
+        LOCK_SH => FlockOperation::Shared,
+        LOCK_EX => FlockOperation::Exclusive,
+        LOCK_UN => FlockOperation::Unlock,
+        _ => return Err(SystemError::EINVAL),
+    };
+
+    Ok((operation, nonblocking))
+}
+
+syscall_table_macros::declare_syscall!(SYS_FLOCK, SysFlockHandle);
diff --git a/kernel/src/filesystem/vfs/vcore.rs b/kernel/src/filesystem/vfs/vcore.rs
index 76da2b7227..08aac58402 100644
--- a/kernel/src/filesystem/vfs/vcore.rs
+++ b/kernel/src/filesystem/vfs/vcore.rs
@@ -55,6 +55,7 @@ pub fn generate_inode_id() -> InodeId {
 pub fn vfs_init() -> Result<(), SystemError> {
     // Initialize global append lock manager before any file write path uses it.
     super::append_lock::init_append_lock_manager();
+    super::flock::init_flock_manager();
 
     mnt_namespace_init();
 
diff --git a/kernel/src/net/socket/unix/datagram/mod.rs b/kernel/src/net/socket/unix/datagram/mod.rs
index 28ea427e9d..666299576a 100644
--- a/kernel/src/net/socket/unix/datagram/mod.rs
+++ b/kernel/src/net/socket/unix/datagram/mod.rs
@@ -1025,8 +1025,8 @@ impl Socket for UnixDatagramSocket {
                 let fd_table_binding = ProcessManager::current_pcb().fd_table();
                 let mut fd_table = fd_table_binding.write();
                 for file in rights.iter().take(fit) {
-                    let new_file = file.as_ref().try_clone().ok_or(SystemError::EINVAL)?;
-                    let new_fd = fd_table.alloc_fd(new_file, None, cloexec)?;
+                    // SCM_RIGHTS 复制的是 fd 引用，必须共享同一个 open file description。
+                    let new_fd = fd_table.alloc_fd_arc(file.clone(), None, cloexec)?;
                     received_fds.push(new_fd);
                 }
             }
diff --git a/kernel/src/net/socket/unix/stream/mod.rs b/kernel/src/net/socket/unix/stream/mod.rs
index fa55f25f9a..a9a8e4aa3f 100644
--- a/kernel/src/net/socket/unix/stream/mod.rs
+++ b/kernel/src/net/socket/unix/stream/mod.rs
@@ -978,8 +978,8 @@ impl Socket for UnixStreamSocket {
                 let fd_table_binding = ProcessManager::current_pcb().fd_table();
                 let mut fd_table = fd_table_binding.write();
                 for file in scm_rights.iter().take(fit) {
-                    let new_file = file.as_ref().try_clone().ok_or(SystemError::EINVAL)?;
-                    let new_fd = fd_table.alloc_fd(new_file, None, cloexec)?;
+                    // SCM_RIGHTS 复制的是 fd 引用，必须共享同一个 open file description。
+                    let new_fd = fd_table.alloc_fd_arc(file.clone(), None, cloexec)?;
                     received_fds.push(new_fd);
                 }
             }
diff --git a/user/apps/c_unitest/test_flock.c b/user/apps/c_unitest/test_flock.c
new file mode 100644
index 0000000000..6639ed949a
--- /dev/null
+++ b/user/apps/c_unitest/test_flock.c
@@ -0,0 +1,492 @@
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int g_total = 0;
+static int g_failed = 0;
+
+#define CHECK(cond, msg)                                                       \
+    do {                                                                       \
+        g_total++;                                                             \
+        if (!(cond)) {                                                         \
+            g_failed++;                                                        \
+            fprintf(stderr, "FAIL: %s (line %d)\n", msg, __LINE__);            \
+        } else {                                                               \
+            printf("PASS: %s\n", msg);                                         \
+        }                                                                      \
+    } while (0)
+
+static int is_wouldblock_errno(int err) { return err == EAGAIN || err == EWOULDBLOCK; }
+
+static int open_rw_file(const char *path) {
+    return open(path, O_RDWR | O_CREAT, 0644);
+}
+
+static void test_invalid_commands(const char *path) {
+    int fd = open_rw_file(path);
+    CHECK(fd >= 0, "open file for invalid command test");
+    if (fd < 0) {
+        return;
+    }
+
+    errno = 0;
+    CHECK(flock(fd, LOCK_EX | LOCK_SH | LOCK_NB) == -1 && errno == EINVAL,
+          "LOCK_EX|LOCK_SH|LOCK_NB returns EINVAL");
+
+    errno = 0;
+    CHECK(flock(fd, LOCK_EX | LOCK_UN | LOCK_NB) == -1 && errno == EINVAL,
+          "LOCK_EX|LOCK_UN|LOCK_NB returns EINVAL");
+
+    errno = 0;
+    CHECK(flock(fd, LOCK_NB) == -1 && errno == EINVAL,
+          "LOCK_NB without operation returns EINVAL");
+
+    close(fd);
+}
+
+static void test_basic_lock_unlock(const char *path) {
+    int fd = open_rw_file(path);
+    CHECK(fd >= 0, "open file for basic flock");
+    if (fd < 0) {
+        return;
+    }
+
+    CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "LOCK_EX|LOCK_NB succeeds");
+    CHECK(flock(fd, LOCK_UN) == 0, "LOCK_UN after exclusive succeeds");
+    CHECK(flock(fd, LOCK_SH | LOCK_NB) == 0, "LOCK_SH|LOCK_NB succeeds");
+    CHECK(flock(fd, LOCK_UN) == 0, "LOCK_UN after shared succeeds");
+
+    close(fd);
+}
+
+static void test_nonblocking_conflict(const char *path) {
+    int fd1 = open_rw_file(path);
+    int fd2 = open_rw_file(path);
+    CHECK(fd1 >= 0 && fd2 >= 0, "open two independent fds");
+    if (fd1 < 0 || fd2 < 0) {
+        if (fd1 >= 0)
+            close(fd1);
+        if (fd2 >= 0)
+            close(fd2);
+        return;
+    }
+
+    CHECK(flock(fd1, LOCK_EX | LOCK_NB) == 0, "fd1 takes exclusive lock");
+    errno = 0;
+    CHECK(flock(fd2, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno),
+          "fd2 nonblocking exclusive lock conflicts");
+    CHECK(flock(fd1, LOCK_UN) == 0, "fd1 unlock succeeds");
+    CHECK(flock(fd2, LOCK_EX | LOCK_NB) == 0, "fd2 lock succeeds after fd1 unlock");
+    CHECK(flock(fd2, LOCK_UN) == 0, "fd2 unlock succeeds");
+
+    close(fd2);
+    close(fd1);
+}
+
+static void test_dup_unlock_release(const char *path) {
+    int fd = open_rw_file(path);
+    int dupfd = dup(fd);
+    int other = open_rw_file(path);
+    CHECK(fd >= 0 && dupfd >= 0 && other >= 0, "open/dup for dup unlock test");
+    if (fd < 0 || dupfd < 0 || other < 0) {
+        if (fd >= 0)
+            close(fd);
+        if (dupfd >= 0)
+            close(dupfd);
+        if (other >= 0)
+            close(other);
+        return;
+    }
+
+    CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "original fd takes exclusive lock");
+    errno = 0;
+    CHECK(flock(other, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno),
+          "unrelated fd is blocked by dup-shared lock");
+    CHECK(flock(dupfd, LOCK_UN) == 0, "LOCK_UN via dup fd releases lock");
+    CHECK(flock(other, LOCK_EX | LOCK_NB) == 0, "unrelated fd can lock after dup unlock");
+    CHECK(flock(other, LOCK_UN) == 0, "unrelated fd unlock succeeds");
+
+    close(other);
+    close(dupfd);
+    close(fd);
+}
+
+static void test_dup_last_close_release(const char *path) {
+    int fd = open_rw_file(path);
+    int dupfd = dup(fd);
+    int other = open_rw_file(path);
+    CHECK(fd >= 0 && dupfd >= 0 && other >= 0, "open/dup for last-close release test");
+    if (fd < 0 || dupfd < 0 || other < 0) {
+        if (fd >= 0)
+            close(fd);
+        if (dupfd >= 0)
+            close(dupfd);
+        if (other >= 0)
+            close(other);
+        return;
+    }
+
+    CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "original fd takes exclusive lock");
+
+    close(dupfd);
+    errno = 0;
+    CHECK(flock(other, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno),
+          "closing one dup fd does not release lock");
+
+    close(fd);
+    CHECK(flock(other, LOCK_EX | LOCK_NB) == 0,
+          "last close of open-file-description releases lock");
+    CHECK(flock(other, LOCK_UN) == 0, "unlock after last-close release succeeds");
+
+    close(other);
+}
+
+static void test_fork_unlock_release(const char *path) {
+    int fd = open_rw_file(path);
+    int other = open_rw_file(path);
+    CHECK(fd >= 0 && other >= 0, "open fds for fork flock test");
+    if (fd < 0 || other < 0) {
+        if (fd >= 0)
+            close(fd);
+        if (other >= 0)
+            close(other);
+        return;
+    }
+
+    CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "parent acquires exclusive lock");
+
+    pid_t pid = fork();
+    CHECK(pid >= 0, "fork for flock test");
+    if (pid < 0) {
+        close(other);
+        close(fd);
+        return;
+    }
+
+    if (pid == 0) {
+        int rc = 0;
+
+        errno = 0;
+        if (!(flock(other, LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno))) {
+            rc = 1;
+        }
+
+        if (rc == 0 && flock(fd, LOCK_UN) != 0) {
+            rc = 1;
+        }
+
+        if (rc == 0 && flock(other, LOCK_EX | LOCK_NB) != 0) {
+            rc = 1;
+        }
+        if (rc == 0 && flock(other, LOCK_UN) != 0) {
+            rc = 1;
+        }
+
+        close(other);
+        close(fd);
+        _exit(rc);
+    }
+
+    int status = 0;
+    CHECK(waitpid(pid, &status, 0) == pid, "wait child for flock fork test");
+    CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0,
+          "child unlock on inherited fd releases shared lock");
+
+    close(other);
+    close(fd);
+}
+
+static volatile sig_atomic_t g_sigalrm_seen = 0;
+
+static void sigalrm_handler(int sig) {
+    (void)sig;
+    g_sigalrm_seen = 1;
+}
+
+static void test_blocking_interrupted_by_signal(const char *path) {
+    int fd1 = open_rw_file(path);
+    int fd2 = open_rw_file(path);
+    CHECK(fd1 >= 0 && fd2 >= 0, "open two independent fds for EINTR test");
+    if (fd1 < 0 || fd2 < 0) {
+        if (fd1 >= 0)
+            close(fd1);
+        if (fd2 >= 0)
+            close(fd2);
+        return;
+    }
+
+    CHECK(flock(fd1, LOCK_EX | LOCK_NB) == 0, "fd1 takes lock before blocking flock");
+
+    struct sigaction old_act;
+    struct sigaction act;
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = sigalrm_handler;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+    CHECK(sigaction(SIGALRM, &act, &old_act) == 0, "install SIGALRM handler");
+
+    g_sigalrm_seen = 0;
+    alarm(1);
+    errno = 0;
+    CHECK(flock(fd2, LOCK_EX) == -1 && errno == EINTR,
+          "blocking flock interrupted by signal returns EINTR");
+    alarm(0);
+    CHECK(g_sigalrm_seen != 0, "SIGALRM handler executed");
+
+    CHECK(sigaction(SIGALRM, &old_act, NULL) == 0, "restore SIGALRM handler");
+    CHECK(flock(fd1, LOCK_UN) == 0, "fd1 unlock after EINTR test");
+
+    close(fd2);
+    close(fd1);
+}
+
+static void test_opath_ebadf(const char *path) {
+#ifdef O_PATH
+    int fd = open(path, O_RDONLY | O_PATH, 0);
+    CHECK(fd >= 0, "open O_PATH file");
+    if (fd < 0) {
+        return;
+    }
+
+    errno = 0;
+    CHECK(flock(fd, LOCK_EX | LOCK_NB) == -1 && errno == EBADF,
+          "flock on O_PATH fd returns EBADF");
+    close(fd);
+#else
+    (void)path;
+    printf("SKIP: O_PATH is unavailable in headers\n");
+#endif
+}
+
+static void test_pipe_flock(void) {
+    int p[2];
+    int rc = pipe(p);
+    CHECK(rc == 0, "create pipe for flock test");
+    if (rc != 0) {
+        return;
+    }
+
+    CHECK(flock(p[0], LOCK_EX | LOCK_NB) == 0, "pipe read end lock succeeds");
+    errno = 0;
+    CHECK(flock(p[1], LOCK_EX | LOCK_NB) == -1 && is_wouldblock_errno(errno),
+          "pipe write end lock conflicts");
+    CHECK(flock(p[0], LOCK_UN) == 0, "pipe read end unlock succeeds");
+    CHECK(flock(p[1], LOCK_EX | LOCK_NB) == 0, "pipe write end lock succeeds after unlock");
+    CHECK(flock(p[1], LOCK_UN) == 0, "pipe write end unlock succeeds");
+
+    close(p[0]);
+    close(p[1]);
+}
+
+static void test_socket_flock(void) {
+    int sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    CHECK(sock >= 0, "create UNIX socket for flock test");
+    if (sock < 0) {
+        return;
+    }
+
+    CHECK(flock(sock, LOCK_EX | LOCK_NB) == 0, "flock on socket succeeds");
+    CHECK(flock(sock, LOCK_UN) == 0, "unlock socket flock succeeds");
+
+    close(sock);
+}
+
+static void test_blocking_downgrade_wakeup(const char *path) {
+    int fd = open_rw_file(path);
+    CHECK(fd >= 0, "open file for downgrade wakeup test");
+    if (fd < 0)
+        return;
+
+    CHECK(flock(fd, LOCK_EX | LOCK_NB) == 0, "parent acquires LOCK_EX");
+
+    int pipefd[2];
+    CHECK(pipe(pipefd) == 0, "create pipe for downgrade wakeup test");
+
+    pid_t pid = fork();
+    CHECK(pid >= 0, "fork for downgrade wakeup test");
+    if (pid < 0) {
+        close(fd);
+        close(pipefd[0]);
+        close(pipefd[1]);
+        return;
+    }
+
+    if (pid == 0) {
+        close(pipefd[0]);
+        int child_fd = open_rw_file(path);
+        if (child_fd < 0)
+            _exit(1);
+
+        /* This should block until the parent downgrades to LOCK_SH */
+        if (flock(child_fd, LOCK_SH) != 0)
+            _exit(2);
+
+        /* Notify parent that we acquired the lock */
+        char ok = 1;
+        write(pipefd[1], &ok, 1);
+
+        flock(child_fd, LOCK_UN);
+        close(child_fd);
+        close(pipefd[1]);
+        _exit(0);
+    }
+
+    /* Parent: give child time to enter blocking flock */
+    close(pipefd[1]);
+    usleep(200000);
+
+    /* Downgrade from LOCK_EX to LOCK_SH — should wake the child */
+    CHECK(flock(fd, LOCK_SH | LOCK_NB) == 0, "parent downgrades to LOCK_SH");
+
+    /* Wait for child to signal success, with a timeout via alarm */
+    struct sigaction old_act;
+    struct sigaction act;
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = sigalrm_handler;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+    sigaction(SIGALRM, &act, &old_act);
+
+    g_sigalrm_seen = 0;
+    alarm(5);
+
+    char buf = 0;
+    int r = read(pipefd[0], &buf, 1);
+    alarm(0);
+    sigaction(SIGALRM, &old_act, NULL);
+
+    CHECK(r == 1 && buf == 1, "child acquired LOCK_SH after parent downgrade");
+
+    int status = 0;
+    waitpid(pid, &status, 0);
+    CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0,
+          "child exited successfully in downgrade wakeup test");
+
+    flock(fd, LOCK_UN);
+    close(fd);
+    close(pipefd[0]);
+}
+
+static void test_blocking_upgrade_wakeup(const char *path) {
+    int fd1 = open_rw_file(path);
+    int fd2 = open_rw_file(path);
+    CHECK(fd1 >= 0 && fd2 >= 0, "open fds for upgrade wakeup test");
+    if (fd1 < 0 || fd2 < 0) {
+        if (fd1 >= 0)
+            close(fd1);
+        if (fd2 >= 0)
+            close(fd2);
+        return;
+    }
+
+    CHECK(flock(fd1, LOCK_SH | LOCK_NB) == 0, "fd1 acquires LOCK_SH");
+    CHECK(flock(fd2, LOCK_SH | LOCK_NB) == 0, "fd2 acquires LOCK_SH");
+
+    int pipefd[2];
+    CHECK(pipe(pipefd) == 0, "create pipe for upgrade wakeup test");
+
+    pid_t pid = fork();
+    CHECK(pid >= 0, "fork for upgrade wakeup test");
+    if (pid < 0) {
+        close(fd1);
+        close(fd2);
+        close(pipefd[0]);
+        close(pipefd[1]);
+        return;
+    }
+
+    if (pid == 0) {
+        close(pipefd[0]);
+        /* Child inherited both shared locks via fd1 and fd2.
+         * Release fd1's shared lock so only fd2 remains from child side. */
+        flock(fd1, LOCK_UN);
+        close(fd1);
+
+        /* Try to upgrade fd2 to LOCK_EX — should block because parent still holds fd1 LOCK_SH */
+        if (flock(fd2, LOCK_EX) != 0)
+            _exit(2);
+
+        /* Notify parent that we acquired the lock */
+        char ok = 1;
+        write(pipefd[1], &ok, 1);
+
+        flock(fd2, LOCK_UN);
+        close(fd2);
+        close(pipefd[1]);
+        _exit(0);
+    }
+
+    /* Parent: give child time to enter blocking flock */
+    close(pipefd[1]);
+    usleep(200000);
+
+    /* Release parent's LOCK_SH on fd1 — should unblock the child's LOCK_EX on fd2 */
+    CHECK(flock(fd1, LOCK_UN) == 0, "parent releases LOCK_SH on fd1");
+
+    /* Wait for child to signal success, with a timeout via alarm */
+    struct sigaction old_act;
+    struct sigaction act;
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = sigalrm_handler;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+    sigaction(SIGALRM, &act, &old_act);
+
+    g_sigalrm_seen = 0;
+    alarm(5);
+
+    char buf = 0;
+    int r = read(pipefd[0], &buf, 1);
+    alarm(0);
+    sigaction(SIGALRM, &old_act, NULL);
+
+    CHECK(r == 1 && buf == 1, "child acquired LOCK_EX after parent released LOCK_SH");
+
+    int status = 0;
+    waitpid(pid, &status, 0);
+    CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0,
+          "child exited successfully in upgrade wakeup test");
+
+    flock(fd2, LOCK_UN);
+    close(fd1);
+    close(fd2);
+    close(pipefd[0]);
+}
+
+int main(void) {
+    char path[128];
+    snprintf(path, sizeof(path), "/tmp/test_flock_%d.tmp", getpid());
+
+    int initfd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0644);
+    CHECK(initfd >= 0, "create flock test file");
+    if (initfd >= 0) {
+        close(initfd);
+    }
+
+    test_invalid_commands(path);
+    test_basic_lock_unlock(path);
+    test_nonblocking_conflict(path);
+    test_dup_unlock_release(path);
+    test_dup_last_close_release(path);
+    test_fork_unlock_release(path);
+    test_blocking_interrupted_by_signal(path);
+    test_opath_ebadf(path);
+    test_pipe_flock();
+    test_socket_flock();
+    test_blocking_downgrade_wakeup(path);
+    test_blocking_upgrade_wakeup(path);
+
+    unlink(path);
+
+    printf("test_flock summary: total=%d failed=%d\n", g_total, g_failed);
+    return g_failed == 0 ? 0 : 1;
+}
diff --git a/user/apps/tests/syscall/gvisor/whitelist.txt b/user/apps/tests/syscall/gvisor/whitelist.txt
index 595b65d788..5ab44904b3 100644
--- a/user/apps/tests/syscall/gvisor/whitelist.txt
+++ b/user/apps/tests/syscall/gvisor/whitelist.txt
@@ -39,6 +39,7 @@ pwritev2_test
 utimes_test
 truncate_test
 fadvise_test
+flock_test
 open_test
 open_create_test
 select_test