From 19f9ef3351e71dc50ed7cae6de13f13120043634 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 2 Sep 2025 22:45:55 +0800 Subject: [PATCH 01/25] vfs: rework the vfs subsystem with async Inode and superblock rework: Remove old Inode trait as it used to undertake too much responsibility. The new method use three new traits: InodeOps is used to acquire generic inode attributes. InodeFileOps and InodeDirOps handle file and directory requests respectively. All the three have async fn trait methods and don't need to be ?Sized. Then, we implement Inode, InodeFile and InodeDir for the implementors of the three "Ops" traits, erasing their actual type and provide generic dyn interface by wrapping the futures in boxes. We should provide an io worker? or some io context with an allocator for futures to reduce the overhead of io requests, or come up with some better ideas? For inode usage, we introduce InodeRef and InodeUse. InodeRef is a simple wrapper for Weak and InodeUse for Arc. This helps us use them better as we can't define impls for Arc's as they are foreign types. We also provide some more helper methods for them. After the change, we don't impose ANY structural restriction except for the spinlock wrapped InodeInfo. The InodeInfo struct design might need rethinking but the current implementation seems to be fine aside from unnecessary locks when we try to access some of its fields but this shouldn't be a VERY big problem and very urgent... Similar changes are also made to superblock traits and types. But for the superblock objects, we use a SuperBlockComplex struct to store common fields such as whether the superblock is read only, their device id and so on. Also the structs have a superblock rwsem inside. But we haven't decided how to use that (such as whether we should acquire the lock and pass it to the inode methods) and even whether they should exist and be there. This will need further thinking so we put this off for now... Filesystem rework: Rework tmpfs, fatfs and procfs with the new technology mentioned above, leaving the old ext4 unchanged. The current implementation of ext4 uses some "random" library from the "camp". Its code hasn't been fully reviewed for time reasons but seems to be rather "problematic"... We might rewrite the whole module later and the page cache subsystem requires fully reworking as well. So we put this work off as well. Block device and other parts rework: Wraps PageCacheBackend, MountCreator and BlockRequestQueue with async_trait to provide dyn compatible async functions. Dentry walking functions are also put to the heap since they are recursive functions... This has similar problems to the inode traits, ugly solution. Further optimization is required. Signed-off-by: greatbridf --- .vscode/tasks.json | 2 +- Cargo.lock | 98 +++++ Cargo.toml | 2 + src/driver/ahci/mod.rs | 63 +-- src/driver/ahci/port.rs | 48 ++- src/driver/ahci/slot.rs | 19 +- src/driver/e1000e.rs | 8 +- src/driver/serial.rs | 6 +- src/driver/virtio/riscv64.rs | 7 +- src/driver/virtio/virtio_blk.rs | 5 +- src/fs/ext4.rs | 35 +- src/fs/fat32.rs | 567 ++++++++++++++++----------- src/fs/fat32/dir.rs | 214 +++++------ src/fs/fat32/file.rs | 46 +-- src/fs/mod.rs | 3 +- src/fs/procfs.rs | 437 +++++++++------------ src/fs/shm.rs | 146 ------- src/fs/tmpfs.rs | 613 ------------------------------ src/fs/tmpfs/dir.rs | 415 ++++++++++++++++++++ src/fs/tmpfs/file.rs | 298 +++++++++++++++ src/fs/tmpfs/mod.rs | 73 ++++ src/kernel/block.rs | 38 +- src/kernel/block/mbr.rs | 2 +- src/kernel/chardev.rs | 15 +- src/kernel/mem.rs | 2 +- src/kernel/mem/mm_list/mapping.rs | 15 +- src/kernel/mem/page_cache.rs | 55 ++- src/kernel/pcie/driver.rs | 9 +- src/kernel/syscall/file_rw.rs | 94 +++-- src/kernel/syscall/mm.rs | 122 +----- src/kernel/syscall/procops.rs | 20 +- src/kernel/task/loader/elf.rs | 34 +- src/kernel/task/loader/mod.rs | 13 +- src/kernel/timer.rs | 2 + src/kernel/vfs/dentry.rs | 295 +++++++------- src/kernel/vfs/dentry/dcache.rs | 29 +- src/kernel/vfs/file/inode_file.rs | 139 ++++--- src/kernel/vfs/filearray.rs | 23 +- src/kernel/vfs/inode.rs | 494 ------------------------ src/kernel/vfs/inode/ino.rs | 31 ++ src/kernel/vfs/inode/inode.rs | 389 +++++++++++++++++++ src/kernel/vfs/inode/mod.rs | 10 + src/kernel/vfs/inode/ops.rs | 18 + src/kernel/vfs/inode/statx.rs | 97 +++++ src/kernel/vfs/mod.rs | 22 +- src/kernel/vfs/mount.rs | 32 +- src/kernel/vfs/superblock.rs | 127 +++++++ src/kernel/vfs/types/device_id.rs | 36 ++ src/kernel/vfs/types/mod.rs | 5 + src/kernel/vfs/types/mode.rs | 169 ++++++++ src/kernel/vfs/vfs.rs | 10 - src/lib.rs | 33 +- src/prelude.rs | 30 +- 53 files changed, 2986 insertions(+), 2529 deletions(-) delete mode 100644 src/fs/shm.rs delete mode 100644 src/fs/tmpfs.rs create mode 100644 src/fs/tmpfs/dir.rs create mode 100644 src/fs/tmpfs/file.rs create mode 100644 src/fs/tmpfs/mod.rs delete mode 100644 src/kernel/vfs/inode.rs create mode 100644 src/kernel/vfs/inode/ino.rs create mode 100644 src/kernel/vfs/inode/inode.rs create mode 100644 src/kernel/vfs/inode/mod.rs create mode 100644 src/kernel/vfs/inode/ops.rs create mode 100644 src/kernel/vfs/inode/statx.rs create mode 100644 src/kernel/vfs/superblock.rs create mode 100644 src/kernel/vfs/types/device_id.rs create mode 100644 src/kernel/vfs/types/mod.rs create mode 100644 src/kernel/vfs/types/mode.rs delete mode 100644 src/kernel/vfs/vfs.rs diff --git a/.vscode/tasks.json b/.vscode/tasks.json index e7a54791..a85ea0cf 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -6,7 +6,7 @@ { "label": "debug run riscv64", "type": "shell", - "command": "make srun ARCH=riscv64", + "command": "make srun ARCH=riscv64 IMG=/Volumes/oscomp/sdcard-rv.img", "isBackground": true, "problemMatcher": [ { diff --git a/Cargo.lock b/Cargo.lock index 59242bbc..32868677 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,17 @@ dependencies = [ "log", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic_unique_refcell" version = "0.1.0" @@ -144,6 +155,7 @@ dependencies = [ "acpi", "align_ext", "another_ext4", + "async-trait", "atomic_unique_refcell", "bitflags", "buddy_allocator", @@ -155,6 +167,7 @@ dependencies = [ "eonix_preempt", "eonix_runtime", "eonix_sync", + "futures", "intrusive-collections 0.9.8", "intrusive_list", "itertools", @@ -265,6 +278,79 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784a4df722dc6267a04af36895398f59d21d07dce47232adf31ec0ff2fa45e67" +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-sink", + "futures-task", + "pin-project-lite", + "pin-utils", +] + [[package]] name = "gimli" version = "0.32.0" @@ -332,6 +418,18 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pointers" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index e70d8c65..bc7e7b0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,8 @@ another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch stalloc = { version = "0.6.1", default-features = false, features = [ "allocator-api", ] } +async-trait = "0.1.89" +futures = { version = "0.3.31", features = ["alloc", "async-await"], default-features = false } [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index c3b1cfa0..ab405829 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -2,15 +2,16 @@ use crate::{ fs::procfs, io::Buffer as _, kernel::{ - block::{make_device, BlockDevice}, + block::BlockDevice, constants::{EINVAL, EIO}, interrupt::register_irq_handler, pcie::{self, Header, PCIDevice, PCIDriver, PciError}, - task::block_on, + vfs::types::DeviceId, }, prelude::*, }; use alloc::{format, sync::Arc}; +use async_trait::async_trait; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; @@ -108,7 +109,30 @@ impl Device<'_> { } impl Device<'static> { - fn probe_ports(&self) -> KResult<()> { + async fn probe_port(&self, port: Arc>) -> KResult<()> { + port.init().await?; + + { + let port = port.clone(); + let name = format!("ahci-p{}-stats", port.nport); + procfs::populate_root(name.into_bytes().into(), move |buffer| { + port.print_stats(&mut buffer.get_writer()) + }) + .await; + } + + let port = BlockDevice::register_disk( + DeviceId::new(8, port.nport as u16 * 16), + 2147483647, // TODO: get size from device + port, + )?; + + port.partprobe().await?; + + Ok(()) + } + + async fn probe_ports(&self) -> KResult<()> { for nport in self.control.implemented_ports() { let port = Arc::new(AdapterPort::new(self.control_base, nport)); if !port.status_ok() { @@ -116,27 +140,7 @@ impl Device<'static> { } self.ports.lock_irq()[nport as usize] = Some(port.clone()); - if let Err(e) = (|| -> KResult<()> { - port.init()?; - - { - let port = port.clone(); - let name = format!("ahci-p{}-stats", port.nport); - procfs::populate_root(name.into_bytes().into(), move |buffer| { - port.print_stats(&mut buffer.get_writer()) - })?; - } - - let port = BlockDevice::register_disk( - make_device(8, nport * 16), - 2147483647, // TODO: get size from device - port, - )?; - - block_on(port.partprobe())?; - - Ok(()) - })() { + if let Err(e) = self.probe_port(port).await { self.ports.lock_irq()[nport as usize] = None; println_warn!("probe port {nport} failed with {e}"); } @@ -154,6 +158,7 @@ impl AHCIDriver { } } +#[async_trait] impl PCIDriver for AHCIDriver { fn vendor_id(&self) -> u16 { VENDOR_INTEL @@ -163,7 +168,7 @@ impl PCIDriver for AHCIDriver { DEVICE_AHCI } - fn handle_device(&self, pcidev: Arc>) -> Result<(), PciError> { + async fn handle_device(&self, pcidev: Arc>) -> Result<(), PciError> { let Header::Endpoint(header) = pcidev.header else { Err(EINVAL)? }; @@ -200,7 +205,7 @@ impl PCIDriver for AHCIDriver { let device_irq = device.clone(); register_irq_handler(irqno as i32, move || device_irq.handle_interrupt())?; - device.probe_ports()?; + device.probe_ports().await?; self.devices.lock().push(device); @@ -208,6 +213,8 @@ impl PCIDriver for AHCIDriver { } } -pub fn register_ahci_driver() { - pcie::register_driver(AHCIDriver::new()).expect("Register ahci driver failed"); +pub async fn register_ahci_driver() { + pcie::register_driver(AHCIDriver::new()) + .await + .expect("Register ahci driver failed"); } diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index f558f6e1..77286ec5 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -9,9 +9,9 @@ use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; use crate::kernel::mem::paging::Page; use crate::kernel::mem::AsMemoryBlock as _; -use crate::kernel::task::block_on; use crate::prelude::*; use alloc::collections::vec_deque::VecDeque; +use async_trait::async_trait; use core::pin::pin; use eonix_mm::address::{Addr as _, PAddr}; use eonix_sync::{SpinIrq as _, WaitList}; @@ -145,18 +145,25 @@ impl AdapterPort<'_> { self.sata_status().read_once() & 0xf == 0x3 } - fn get_free_slot(&self) -> u32 { + async fn get_free_slot(&self) -> u32 { loop { - let mut free_list = self.free_list.lock_irq(); - let free_slot = free_list.free.pop_front(); - if let Some(slot) = free_slot { - return slot; - } let mut wait = pin!(self.free_list_wait.prepare_to_wait()); - wait.as_mut().add_to_wait_list(); - drop(free_list); - block_on(wait); + { + let mut free_list = self.free_list.lock_irq(); + + if let Some(slot) = free_list.free.pop_front() { + return slot; + } + + wait.as_mut().add_to_wait_list(); + + if let Some(slot) = free_list.free.pop_front() { + return slot; + } + } + + wait.await; } } @@ -204,11 +211,11 @@ impl AdapterPort<'_> { Ok(()) } - fn send_command(&self, cmd: &impl Command) -> KResult<()> { + async fn send_command(&self, cmd: &impl Command) -> KResult<()> { let mut cmdtable = CommandTable::new(); cmdtable.setup(cmd); - let slot_index = self.get_free_slot(); + let slot_index = self.get_free_slot().await; let slot = &self.slots[slot_index as usize]; slot.prepare_command(&cmdtable, cmd.write()); @@ -222,7 +229,7 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = block_on(slot.wait_finish()) { + if let Err(_) = slot.wait_finish().await { self.stats.inc_cmd_error(); return Err(EIO); }; @@ -231,16 +238,16 @@ impl AdapterPort<'_> { Ok(()) } - fn identify(&self) -> KResult<()> { + async fn identify(&self) -> KResult<()> { let cmd = IdentifyCommand::new(); // TODO: check returned data - self.send_command(&cmd)?; + self.send_command(&cmd).await?; Ok(()) } - pub fn init(&self) -> KResult<()> { + pub async fn init(&self) -> KResult<()> { self.stop_command()?; self.command_list_base() @@ -251,7 +258,7 @@ impl AdapterPort<'_> { self.start_command()?; - match self.identify() { + match self.identify().await { Err(err) => { self.stop_command()?; Err(err) @@ -269,12 +276,13 @@ impl AdapterPort<'_> { } } +#[async_trait] impl BlockRequestQueue for AdapterPort<'_> { fn max_request_pages(&self) -> u64 { 1024 } - fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> { match req { BlockDeviceRequest::Read { sector, @@ -287,7 +295,7 @@ impl BlockRequestQueue for AdapterPort<'_> { let command = ReadLBACommand::new(buffer, sector, count as u16)?; - self.send_command(&command) + self.send_command(&command).await } BlockDeviceRequest::Write { sector, @@ -300,7 +308,7 @@ impl BlockRequestQueue for AdapterPort<'_> { let command = WriteLBACommand::new(buffer, sector, count as u16)?; - self.send_command(&command) + self.send_command(&command).await } } } diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index 2198c457..60a66de3 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -71,19 +71,20 @@ impl<'a> CommandSlot<'a> { pub async fn wait_finish(&self) -> KResult<()> { let mut inner = loop { - let inner = self.inner.lock_irq(); - if inner.state != SlotState::Working { - break inner; - } - let mut wait = pin!(self.wait_list.prepare_to_wait()); - wait.as_mut().add_to_wait_list(); - if inner.state != SlotState::Working { - break inner; + { + let inner = self.inner.lock_irq(); + if inner.state != SlotState::Working { + break inner; + } + wait.as_mut().add_to_wait_list(); + + if inner.state != SlotState::Working { + break inner; + } } - drop(inner); wait.await; }; diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index ba31b8b1..f362f477 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -8,6 +8,7 @@ use crate::prelude::*; use alloc::boxed::Box; use alloc::sync::Arc; use alloc::vec::Vec; +use async_trait::async_trait; use core::ptr::NonNull; use eonix_hal::fence::memory_barrier; use eonix_mm::address::{Addr, PAddr}; @@ -437,6 +438,7 @@ struct Driver { dev_id: u16, } +#[async_trait] impl PCIDriver for Driver { fn vendor_id(&self) -> u16 { 0x8086 @@ -446,7 +448,7 @@ impl PCIDriver for Driver { self.dev_id } - fn handle_device(&self, device: Arc>) -> Result<(), PciError> { + async fn handle_device(&self, device: Arc>) -> Result<(), PciError> { let Header::Endpoint(header) = device.header else { Err(EINVAL)? }; @@ -473,10 +475,10 @@ impl PCIDriver for Driver { } } -pub fn register_e1000e_driver() { +pub async fn register_e1000e_driver() { let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a]; for id in dev_ids.into_iter() { - pcie::register_driver(Driver { dev_id: id }).unwrap(); + pcie::register_driver(Driver { dev_id: id }).await.unwrap(); } } diff --git a/src/driver/serial.rs b/src/driver/serial.rs index d69965f4..b634c232 100644 --- a/src/driver/serial.rs +++ b/src/driver/serial.rs @@ -2,8 +2,8 @@ mod io; use crate::{ kernel::{ - block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler, - CharDevice, CharDeviceType, Terminal, TerminalDevice, + console::set_console, constants::EIO, interrupt::register_irq_handler, + vfs::types::DeviceId, CharDevice, CharDeviceType, Terminal, TerminalDevice, }, prelude::*, }; @@ -167,7 +167,7 @@ impl Serial { eonix_log::set_console(terminal.clone()); CharDevice::register( - make_device(4, 64 + port.id), + DeviceId::new(4, 64 + port.id as u16), port.name.clone(), CharDeviceType::Terminal(terminal), )?; diff --git a/src/driver/virtio/riscv64.rs b/src/driver/virtio/riscv64.rs index 9bdbf6ce..b33e16ac 100644 --- a/src/driver/virtio/riscv64.rs +++ b/src/driver/virtio/riscv64.rs @@ -1,8 +1,5 @@ use super::virtio_blk::HAL; -use crate::kernel::{ - block::{make_device, BlockDevice}, - task::block_on, -}; +use crate::kernel::{block::BlockDevice, task::block_on, vfs::types::DeviceId}; use alloc::{sync::Arc, vec::Vec}; use eonix_hal::arch_exported::fdt::FDT; use eonix_hal::mm::ArchPhysAccess; @@ -43,7 +40,7 @@ pub fn init() { .expect("Failed to initialize VirtIO Block device"); let block_device = BlockDevice::register_disk( - make_device(8, 16 * disk_id), + DeviceId::new(8, 16 * disk_id), 2147483647, Arc::new(Spin::new(block_device)), ) diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index 37e4fe77..86b500b6 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -7,6 +7,8 @@ use crate::{ }, prelude::KResult, }; +use alloc::boxed::Box; +use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::{ address::{Addr, PAddr, PhysAccess}, @@ -74,6 +76,7 @@ unsafe impl Hal for HAL { } } +#[async_trait] impl BlockRequestQueue for Spin> where T: Transport + Send, @@ -82,7 +85,7 @@ where 1024 } - fn submit(&self, req: BlockDeviceRequest) -> KResult<()> { + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> { match req { BlockDeviceRequest::Write { sector, diff --git a/src/fs/ext4.rs b/src/fs/ext4.rs index 76ca4a34..121339d3 100644 --- a/src/fs/ext4.rs +++ b/src/fs/ext4.rs @@ -1,5 +1,3 @@ -use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; - use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; use crate::kernel::task::block_on; use crate::kernel::timer::Ticks; @@ -31,6 +29,8 @@ use alloc::{ use another_ext4::{ Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId, }; +use async_trait::async_trait; +use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use eonix_sync::RwLock; pub struct Ext4BlockDevice { @@ -194,7 +194,7 @@ impl Ext4Fs { root_inode.inode.mtime_extra() as _, )), rwsem: RwLock::new(()), - vfs: Arc::downgrade(&ext4fs) as _, + sb: Arc::downgrade(&ext4fs) as _, }, ) }; @@ -290,7 +290,7 @@ impl Inode for FileInode { } fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; buffer.total()]; @@ -334,7 +334,7 @@ impl Inode for FileInode { fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { //let _lock = Task::block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let mut temp_buf = vec![0u8; 4096]; @@ -363,7 +363,7 @@ impl Inode for FileInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_mode = self.mode.load(); let new_mode = old_mode.perm(mode.bits()); @@ -428,7 +428,7 @@ impl DirInode { impl Inode for DirInode { fn lookup(&self, dentry: &Arc) -> KResult>> { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = dentry.get_name(); @@ -477,7 +477,7 @@ impl Inode for DirInode { ctime: Spin::new(Instant::new(attr.ctime as _, 0)), mtime: Spin::new(Instant::new(attr.mtime as _, 0)), rwsem: RwLock::new(()), - vfs: self.vfs.clone(), + sb: self.sb.clone(), }, ); @@ -489,7 +489,7 @@ impl Inode for DirInode { offset: usize, callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, ) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let entries = ext4fs @@ -519,7 +519,7 @@ impl Inode for DirInode { fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = at.get_name(); @@ -534,7 +534,7 @@ impl Inode for DirInode { ) .unwrap(); - let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode); + let file = FileInode::new(new_ino as u64, self.sb.clone(), mode); let now = Instant::now(); self.update_child_time(file.as_ref(), now); self.link_file(); @@ -547,7 +547,7 @@ impl Inode for DirInode { fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let name = at.get_name(); @@ -562,7 +562,7 @@ impl Inode for DirInode { ) .unwrap(); - let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode); + let new_dir = DirInode::new(new_ino as u64, self.sb.clone(), mode); let now = Instant::now(); self.update_child_time(new_dir.as_ref(), now); self.link_dir(); @@ -575,7 +575,7 @@ impl Inode for DirInode { fn unlink(&self, at: &Arc) -> KResult<()> { let _dir_lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let file = at.get_inode()?; @@ -602,7 +602,7 @@ impl Inode for DirInode { fn chmod(&self, mode: Mode) -> KResult<()> { let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_mode = self.mode.load(); let new_mode = old_mode.perm(mode.bits()); @@ -638,7 +638,7 @@ impl Inode for DirInode { // TODO: may need another lock let _lock = block_on(self.rwsem.write()); - let vfs = self.vfs.upgrade().ok_or(EIO)?; + let vfs = self.sb.upgrade().ok_or(EIO)?; let ext4fs = vfs.as_any().downcast_ref::().unwrap(); let old_file = old_dentry.get_inode()?; @@ -698,6 +698,7 @@ impl From for Mode { struct Ext4MountCreator; +#[async_trait] impl MountCreator for Ext4MountCreator { fn check_signature(&self, mut first_block: &[u8]) -> KResult { match first_block.split_off(1080..) { @@ -707,7 +708,7 @@ impl MountCreator for Ext4MountCreator { } } - fn create_mount(&self, source: &str, _flags: u64, mp: &Arc) -> KResult { + async fn create_mount(&self, source: &str, _flags: u64, mp: &Arc) -> KResult { let source = source.as_bytes(); let path = Path::new(source)?; diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 12eabdd5..1104337d 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,41 +1,72 @@ mod dir; mod file; -use crate::io::Stream; -use crate::kernel::constants::EIO; +use core::future::Future; +use core::ops::Deref; + +use alloc::sync::{Arc, Weak}; +use async_trait::async_trait; +use dir::{as_raw_dirents, ParseDirent}; +use eonix_sync::RwLock; +use itertools::Itertools; + +use crate::kernel::constants::{EINVAL, EIO}; use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; -use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::{Mode, WriteOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::types::{DeviceId, Format, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::prelude::*; use crate::{ io::{Buffer, ByteBuffer, UninitBuffer}, kernel::{ - block::{make_device, BlockDevice, BlockDeviceRequest}, + block::{BlockDevice, BlockDeviceRequest}, mem::{ paging::Page, - {CachePage, PageCache, PageCacheBackend}, + {CachePage, PageCache, PageCacheBackendOps}, }, vfs::{ dentry::Dentry, - inode::{define_struct_inode, Ino, Inode, InodeData}, + inode::{Ino, Inode}, mount::{register_filesystem, Mount, MountCreator}, - vfs::Vfs, - DevId, }, }, - prelude::*, KResult, }; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, - vec::Vec, -}; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use dir::Dirs as _; -use eonix_sync::RwLock; -use file::ClusterRead; -type ClusterNo = u32; +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +struct Cluster(u32); + +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +struct RawCluster(pub u32); + +impl RawCluster { + const START: u32 = 2; + const EOC: u32 = 0x0FFF_FFF8; + const INVL: u32 = 0xF000_0000; + + fn parse(self) -> Option { + match self.0 { + ..Self::START | Self::EOC..Self::INVL => None, + Self::INVL.. => { + unreachable!("invalid cluster number: RawCluster({:#08x})", self.0) + } + no => Some(Cluster(no)), + } + } +} + +impl Cluster { + pub fn as_ino(self) -> Ino { + Ino::new(self.0 as _) + } + + fn normalized(self) -> Self { + Self(self.0 - 2) + } +} const SECTOR_SIZE: usize = 512; @@ -59,7 +90,7 @@ struct Bootsector { sectors_per_fat: u32, flags: u16, fat_version: u16, - root_cluster: ClusterNo, + root_cluster: RawCluster, fsinfo_sector: u16, backup_bootsector: u16, _reserved: [u8; 12], @@ -73,222 +104,203 @@ struct Bootsector { mbr_signature: u16, } -impl_any!(FatFs); /// # Lock order /// 2. FatTable /// 3. Inodes /// struct FatFs { sectors_per_cluster: u8, - rootdir_cluster: ClusterNo, - data_start: u64, - volume_label: [u8; 11], + data_start_sector: u64, + _rootdir_cluster: Cluster, + _volume_label: Box, device: Arc, - fat: RwLock>, - weak: Weak, - icache: BTreeMap, + fat: RwLock>, } -impl Vfs for FatFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - self.device.devid() - } - - fn is_read_only(&self) -> bool { - true - } -} +impl SuperBlock for FatFs {} impl FatFs { - fn read_cluster(&self, cluster: ClusterNo, buf: &Page) -> KResult<()> { - let cluster = cluster - 2; + async fn read_cluster(&self, mut cluster: Cluster, buf: &Page) -> KResult<()> { + cluster = cluster.normalized(); let rq = BlockDeviceRequest::Read { - sector: self.data_start as u64 + cluster as u64 * self.sectors_per_cluster as u64, + sector: self.data_start_sector as u64 + + cluster.0 as u64 * self.sectors_per_cluster as u64, count: self.sectors_per_cluster as u64, buffer: core::slice::from_ref(buf), }; - self.device.commit_request(rq)?; + self.device.commit_request(rq).await?; Ok(()) } - - fn get_or_alloc_inode(&self, ino: Ino, is_directory: bool, size: u32) -> Arc { - self.icache - .get(&ino) - .cloned() - .map(FatInode::unwrap) - .unwrap_or_else(|| { - if is_directory { - DirInode::new(ino, self.weak.clone(), size) - } else { - FileInode::new(ino, self.weak.clone(), size) - } - }) - } } impl FatFs { - pub fn create(device: DevId) -> KResult<(Arc, Arc)> { + pub async fn create(device: DeviceId) -> KResult<(SbUse, InodeUse)> { let device = BlockDevice::get(device)?; - let mut fatfs_arc = Arc::new_cyclic(|weak: &Weak| Self { - device, - sectors_per_cluster: 0, - rootdir_cluster: 0, - data_start: 0, - fat: RwLock::new(Vec::new()), - weak: weak.clone(), - icache: BTreeMap::new(), - volume_label: [0; 11], - }); - let fatfs = unsafe { Arc::get_mut_unchecked(&mut fatfs_arc) }; - - let mut info: UninitBuffer = UninitBuffer::new(); - fatfs.device.read_some(0, &mut info)?.ok_or(EIO)?; + let mut info = UninitBuffer::::new(); + device.read_some(0, &mut info).await?.ok_or(EIO)?; let info = info.assume_filled_ref()?; - fatfs.sectors_per_cluster = info.sectors_per_cluster; - fatfs.rootdir_cluster = info.root_cluster; - fatfs.data_start = - info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64; - - let fat = fatfs.fat.get_mut(); - - fat.resize( - 512 * info.sectors_per_fat as usize / core::mem::size_of::(), - 0, + let mut fat = Box::new_uninit_slice( + 512 * info.sectors_per_fat as usize / core::mem::size_of::(), ); - let mut buffer = ByteBuffer::from(fat.as_mut_slice()); - - fatfs - .device - .read_some(info.reserved_sectors as usize * 512, &mut buffer)? + device + .read_some( + info.reserved_sectors as usize * 512, + &mut ByteBuffer::from(fat.as_mut()), + ) + .await? .ok_or(EIO)?; - info.volume_label - .iter() - .take_while(|&&c| c != ' ' as u8) - .take(11) - .enumerate() - .for_each(|(idx, c)| fatfs.volume_label[idx] = *c); + let sectors_per_cluster = info.sectors_per_cluster; + let rootdir_cluster = info.root_cluster.parse().ok_or(EINVAL)?; - let root_dir_cluster_count = ClusterIterator::new(fat, fatfs.rootdir_cluster).count(); - let root_dir_size = root_dir_cluster_count as u32 * info.sectors_per_cluster as u32 * 512; + let data_start_sector = + info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64; + + let volume_label = { + let end = info + .volume_label + .iter() + .position(|&c| c == b' ') + .unwrap_or(info.volume_label.len()); + + String::from_utf8_lossy(&info.volume_label[..end]) + .into_owned() + .into_boxed_str() + }; - let root_inode = DirInode::new( - (info.root_cluster & !0xF000_0000) as Ino, - fatfs.weak.clone(), - root_dir_size, + let fat = unsafe { fat.assume_init() }; + + let rootdir_cluster_count = ClusterIterator::new(fat.as_ref(), rootdir_cluster).count(); + let rootdir_size = rootdir_cluster_count as u32 * sectors_per_cluster as u32 * 512; + + let fatfs = SbUse::new( + SuperBlockInfo { + io_blksize: 4096, + device_id: device.devid(), + read_only: true, + }, + Self { + device, + sectors_per_cluster, + _rootdir_cluster: rootdir_cluster, + data_start_sector, + fat: RwLock::new(fat), + _volume_label: volume_label, + }, ); - Ok((fatfs_arc, root_inode)) + let sbref = SbRef::from(&fatfs); + Ok((fatfs, DirInode::new(rootdir_cluster, sbref, rootdir_size))) } } -struct ClusterIterator<'fat> { - fat: &'fat [ClusterNo], - cur: ClusterNo, +struct ClusterIterator<'a> { + fat: &'a [RawCluster], + cur: Option, } -impl<'fat> ClusterIterator<'fat> { - fn new(fat: &'fat [ClusterNo], start: ClusterNo) -> Self { - Self { fat, cur: start } +impl<'a> ClusterIterator<'a> { + fn new(fat: &'a [RawCluster], start: Cluster) -> Self { + Self { + fat, + cur: Some(start), + } } } impl<'fat> Iterator for ClusterIterator<'fat> { - type Item = ClusterNo; + type Item = Cluster; fn next(&mut self) -> Option { - const EOC: ClusterNo = 0x0FFF_FFF8; - const INVL: ClusterNo = 0xF000_0000; - - match self.cur { - ..2 | EOC..INVL => None, - INVL.. => unreachable!("Invalid cluster number: {}", self.cur), - next => { - self.cur = self.fat[next as usize] & !INVL; - Some(next) - } - } + self.cur.inspect(|&Cluster(no)| { + self.cur = self.fat[no as usize].parse(); + }) } } -#[allow(dead_code)] -#[derive(Clone)] -enum FatInode { - File(Arc), - Dir(Arc), +struct FileInode { + cluster: Cluster, + info: Spin, + sb: SbRef, + page_cache: PageCache, } -impl FatInode { - fn unwrap(self) -> Arc { - match self { - FatInode::File(inode) => inode, - FatInode::Dir(inode) => inode, - } +impl FileInode { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new_cyclic(|weak: &Weak| Self { + cluster, + info: Spin::new(InodeInfo { + size: size as u64, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + sb, + page_cache: PageCache::new(weak.clone()), + }) } } -define_struct_inode! { - struct FileInode { - page_cache: PageCache, +impl InodeOps for FileInode { + type SuperBlock = FatFs; + + fn ino(&self) -> Ino { + self.cluster.as_ino() } -} -impl FileInode { - fn new(ino: Ino, weak: Weak, size: u32) -> Arc { - let inode = Arc::new_cyclic(|weak_self: &Weak| Self { - idata: InodeData::new(ino, weak), - page_cache: PageCache::new(weak_self.clone()), - }); + fn format(&self) -> Format { + Format::REG + } - // Safety: We are initializing the inode - inode.nlink.store(1, Ordering::Relaxed); - inode.mode.store(Mode::REG.perm(0o777)); - inode.size.store(size as u64, Ordering::Relaxed); + fn info(&self) -> &Spin { + &self.info + } - inode + fn super_block(&self) -> &SbRef { + &self.sb } -} -impl Inode for FileInode { fn page_cache(&self) -> Option<&PageCache> { Some(&self.page_cache) } +} - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - block_on(self.page_cache.read(buffer, offset)) +impl InodeDirOps for FileInode {} +impl InodeFileOps for FileInode { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + self.page_cache.read(buffer, offset).await } - fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + let sb = self.sb.get()?; + let fs = &sb.backend; + let fat = sb.backend.fat.read().await; - if self.size.load(Ordering::Relaxed) as usize == 0 { + if offset >= self.info.lock().size as usize { return Ok(0); } - let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE; + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; assert!(cluster_size <= 0x1000, "Cluster size is too large"); let skip_clusters = offset / cluster_size; let inner_offset = offset % cluster_size; - let cluster_iter = - ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).skip(skip_clusters); + let cluster_iter = ClusterIterator::new(fat.as_ref(), self.cluster).skip(skip_clusters); let buffer_page = Page::alloc(); for cluster in cluster_iter { - vfs.read_cluster(cluster, &buffer_page)?; + fs.read_cluster(cluster, &buffer_page).await?; let data = unsafe { // SAFETY: We are the only one holding this page. @@ -296,7 +308,7 @@ impl Inode for FileInode { }; let end = offset + data.len(); - let real_end = core::cmp::min(end, self.size.load(Ordering::Relaxed) as usize); + let real_end = end.min(self.info.lock().size as usize); let real_size = real_end - offset; if buffer.fill(&data[..real_size])?.should_stop() { @@ -306,108 +318,203 @@ impl Inode for FileInode { Ok(buffer.wrote()) } +} - fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult { - todo!() +impl PageCacheBackendOps for FileInode { + async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { + self.read_direct(page, offset).await } - fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult { + async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { todo!() } + + fn size(&self) -> usize { + self.info.lock().size as usize + } } -impl PageCacheBackend for FileInode { - fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { - self.read_direct(page, offset) +struct DirInode { + cluster: Cluster, + info: Spin, + sb: SbRef, + + // TODO: Use the new PageCache... + dir_pages: RwLock>, +} + +impl DirInode { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new(Self { + cluster, + info: Spin::new(InodeInfo { + size: size as u64, + nlink: 2, // '.' and '..' + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + sb, + dir_pages: RwLock::new(Vec::new()), + }) } - fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - todo!() + async fn read_dir_pages(&self) -> KResult<()> { + let mut dir_pages = self.dir_pages.write().await; + if !dir_pages.is_empty() { + return Ok(()); + } + + let sb = self.sb.get()?; + let fs = &sb.backend; + let fat = fs.fat.read().await; + + let clusters = ClusterIterator::new(fat.as_ref(), self.cluster); + + for cluster in clusters { + let page = Page::alloc(); + fs.read_cluster(cluster, &page).await?; + + dir_pages.push(page); + } + + Ok(()) } - fn size(&self) -> usize { - self.size.load(Ordering::Relaxed) as usize + async fn get_dir_pages(&self) -> KResult> + use<'_>> { + { + let dir_pages = self.dir_pages.read().await; + if !dir_pages.is_empty() { + return Ok(dir_pages); + } + } + + self.read_dir_pages().await?; + + if let Some(dir_pages) = self.dir_pages.try_read() { + return Ok(dir_pages); + } + + Ok(self.dir_pages.read().await) } } -define_struct_inode! { - struct DirInode; -} +impl InodeOps for DirInode { + type SuperBlock = FatFs; -impl DirInode { - fn new(ino: Ino, weak: Weak, size: u32) -> Arc { - let inode = Arc::new(Self { - idata: InodeData::new(ino, weak), - }); + fn ino(&self) -> Ino { + self.cluster.as_ino() + } + + fn format(&self) -> Format { + Format::DIR + } - // Safety: We are initializing the inode - inode.nlink.store(2, Ordering::Relaxed); - inode.mode.store(Mode::DIR.perm(0o777)); - inode.size.store(size as u64, Ordering::Relaxed); + fn info(&self) -> &Spin { + &self.info + } - inode + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None } } -impl Inode for DirInode { - fn lookup(&self, dentry: &Arc) -> KResult>> { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); - - let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) - .read(vfs, 0) - .dirs(); - - let entry = entries.find(|entry| { - entry - .as_ref() - .map(|entry| &entry.filename == &***dentry.name()) - .unwrap_or(true) +impl InodeFileOps for DirInode {} +impl InodeDirOps for DirInode { + async fn lookup(&self, dentry: &Arc) -> KResult>> { + let sb = self.sb.get()?; + let dir_pages = self.get_dir_pages().await?; + + let dir_data = dir_pages.iter().map(|page| { + unsafe { + // SAFETY: No one could be writing to it. + page.as_memblk().as_bytes() + } }); - match entry { - None => Ok(None), - Some(Err(err)) => Err(err), - Some(Ok(entry)) => Ok(Some(vfs.get_or_alloc_inode( - entry.cluster as Ino, - entry.is_directory, - entry.size, - ))), + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok(); + + let mut dirents = futures::stream::iter(raw_dirents); + + while let Some(result) = dirents.next_dirent().await { + let entry = result?; + + if *entry.filename != ****dentry.name() { + continue; + } + + let sbref = SbRef::from(&sb); + + if entry.is_directory { + return Ok(Some(DirInode::new(entry.cluster, sbref, entry.size) as _)); + } else { + return Ok(Some(FileInode::new(entry.cluster, sbref, entry.size) as _)); + } } + + Ok(None) } - fn do_readdir( - &self, + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let vfs = self.vfs.upgrade().ok_or(EIO)?; - let vfs = vfs.as_any().downcast_ref::().unwrap(); - let fat = block_on(vfs.fat.read()); - - let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo) - .read(vfs, offset) - .dirs(); - - let mut nread = 0usize; - for entry in cluster_iter { - let entry = entry?; - - vfs.get_or_alloc_inode(entry.cluster as Ino, entry.is_directory, entry.size); - if callback(&entry.filename, entry.cluster as Ino)?.is_break() { - break; + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + async move { + let sb = self.sb.get()?; + let fs = &sb.backend; + let dir_pages = self.get_dir_pages().await?; + + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; + + let cluster_offset = offset / cluster_size; + let inner_offset = offset % cluster_size; + let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); + + let dir_data = dir_pages.iter().skip(cluster_offset).map(|page| { + unsafe { + // SAFETY: No one could be writing to it. + page.as_memblk().as_bytes() + } + }); + + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok() + .skip(inner_raw_dirent_offset); + + let mut dirents = futures::stream::iter(raw_dirents); + + let mut nread = 0; + while let Some(result) = dirents.next_dirent().await { + let entry = result?; + + match callback(&entry.filename, entry.cluster.as_ino()) { + Err(err) => return Ok(Err(err)), + Ok(true) => nread += entry.entry_offset as usize, + Ok(false) => break, + } } - nread += entry.entry_offset as usize; + Ok(Ok(nread)) } - - Ok(nread) } } struct FatMountCreator; +#[async_trait] impl MountCreator for FatMountCreator { fn check_signature(&self, mut first_block: &[u8]) -> KResult { match first_block.split_off(82..) { @@ -417,8 +524,8 @@ impl MountCreator for FatMountCreator { } } - fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { - let (fatfs, root_inode) = FatFs::create(make_device(8, 1))?; + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let (fatfs, root_inode) = FatFs::create(DeviceId::new(8, 1)).await?; Mount::new(mp, fatfs, root_inode) } diff --git a/src/fs/fat32/dir.rs b/src/fs/fat32/dir.rs index c4ac6c0d..8a5b6f40 100644 --- a/src/fs/fat32/dir.rs +++ b/src/fs/fat32/dir.rs @@ -1,11 +1,16 @@ -use super::file::ClusterReadIterator; +use core::pin::Pin; + +use alloc::{boxed::Box, string::String}; +use futures::{Stream, StreamExt}; +use posix_types::result::PosixError; + use crate::kernel::constants::EINVAL; use crate::prelude::*; -use alloc::{string::String, sync::Arc}; -use itertools::Itertools; + +use super::{Cluster, RawCluster}; #[repr(C, packed)] -pub(super) struct RawDirEntry { +pub struct RawDirEntry { name: [u8; 8], extension: [u8; 3], attr: u8, @@ -21,9 +26,9 @@ pub(super) struct RawDirEntry { size: u32, } -pub(super) struct FatDirectoryEntry { - pub filename: Arc<[u8]>, - pub cluster: u32, +pub struct FatDirectoryEntry { + pub filename: Box<[u8]>, + pub cluster: Cluster, pub size: u32, pub entry_offset: u32, pub is_directory: bool, @@ -79,7 +84,7 @@ impl RawDirEntry { self.attr & Self::ATTR_DIRECTORY != 0 } - fn long_filename(&self) -> Option<[u16; 13]> { + fn as_raw_long_filename(&self) -> Option<[u16; 13]> { if !self.is_long_filename() { return None; } @@ -103,137 +108,114 @@ impl RawDirEntry { } } -impl<'data, I> RawDirs<'data> for I where I: ClusterReadIterator<'data> {} -trait RawDirs<'data>: ClusterReadIterator<'data> { - fn raw_dirs(self) -> impl Iterator> + 'data - where - Self: Sized, - { - const ENTRY_SIZE: usize = size_of::(); - - self.map(|result| { - let data = result?; - if data.len() % ENTRY_SIZE != 0 { - return Err(EINVAL); - } - - Ok(unsafe { - core::slice::from_raw_parts( - data.as_ptr() as *const RawDirEntry, - data.len() / ENTRY_SIZE, - ) - }) - }) - .flatten_ok() +pub fn as_raw_dirents(data: &[u8]) -> KResult<&[RawDirEntry]> { + let len = data.len(); + if len % size_of::() != 0 { + return Err(EINVAL); } -} - -pub(super) trait Dirs<'data>: ClusterReadIterator<'data> { - fn dirs(self) -> impl Iterator> + 'data - where - Self: Sized; -} -impl<'data, I> Dirs<'data> for I -where - I: ClusterReadIterator<'data>, -{ - fn dirs(self) -> impl Iterator> + 'data - where - Self: Sized, - { - self.raw_dirs().real_dirs() + unsafe { + Ok(core::slice::from_raw_parts( + data.as_ptr() as *const RawDirEntry, + len / size_of::(), + )) } } -trait RealDirs<'data>: Iterator> + 'data { - fn real_dirs(self) -> DirsIter<'data, Self> - where - Self: Sized; +pub trait ParseDirent { + async fn next_dirent(&mut self) -> Option>; } -impl<'data, I> RealDirs<'data> for I +impl<'a, T> ParseDirent for T where - I: Iterator> + 'data, + T: Stream>, { - fn real_dirs(self) -> DirsIter<'data, Self> - where - Self: Sized, - { - DirsIter { iter: self } - } -} + async fn next_dirent(&mut self) -> Option> { + let mut me = unsafe { Pin::new_unchecked(self) }; + + // The long filename entries are stored in reverse order. + // So we reverse all filename segments and then reverse the whole string at the end. + let mut filename_rev = String::new(); + + let mut is_lfn = false; + let mut nr_entry_scanned = 0; + let mut cur_entry; + + loop { + match me.as_mut().next().await { + Some(Err(err)) => return Some(Err(err)), + Some(Ok(ent)) => { + cur_entry = ent; + nr_entry_scanned += 1; + } + None => { + if is_lfn { + // Unterminated long filename entries are invalid. + return Some(Err(PosixError::EINVAL.into())); + } else { + return None; + } + } + }; -pub(super) struct DirsIter<'data, I> -where - I: Iterator> + 'data, -{ - iter: I, -} + if !cur_entry.is_invalid() { + break; + } -impl<'data, I> Iterator for DirsIter<'data, I> -where - I: Iterator> + 'data, -{ - type Item = KResult; - - fn next(&mut self) -> Option { - let mut filename = String::new(); - let mut entry_offset = 0; - let entry = loop { - let entry = match self.iter.next()? { - Ok(entry) => entry, - Err(err) => return Some(Err(err)), - }; - entry_offset += 1; - - let long_filename = entry.long_filename(); - if entry.is_invalid() { - if let Some(long_filename) = long_filename { - let long_filename = long_filename - .iter() - .position(|&ch| ch == 0) - .map(|pos| &long_filename[..pos]) - .unwrap_or(&long_filename); - - filename.extend( - long_filename - .into_iter() - .map(|&ch| char::from_u32(ch as u32).unwrap_or('?')) - .rev(), - ); - } + let Some(raw_long_filename) = cur_entry.as_raw_long_filename() else { continue; - } - break entry; + }; + + // We are processing a long filename entry. + is_lfn = true; + + let real_len = raw_long_filename + .iter() + .position(|&ch| ch == 0) + .unwrap_or(raw_long_filename.len()); + + let name_codes_rev = raw_long_filename.into_iter().take(real_len).rev(); + let name_chars_rev = char::decode_utf16(name_codes_rev).map(|r| r.unwrap_or('?')); + + filename_rev.extend(name_chars_rev); + } + + // From now on, `entry` represents a valid directory entry. + + let raw_cluster = + RawCluster(cur_entry.cluster_low as u32 | ((cur_entry.cluster_high as u32) << 16)); + + let Some(cluster) = raw_cluster.parse() else { + return Some(Err(PosixError::EINVAL.into())); }; - let filename: Arc<[u8]> = if filename.is_empty() { - let mut filename = entry.filename().to_vec(); - let extension = entry.extension(); + let filename; + + if filename_rev.is_empty() { + let mut name = cur_entry.filename().to_vec(); + let extension = cur_entry.extension(); if !extension.is_empty() { - filename.push(b'.'); - filename.extend_from_slice(extension); + name.push(b'.'); + name.extend_from_slice(extension); } - if entry.is_filename_lowercase() { - filename.make_ascii_lowercase(); + if cur_entry.is_filename_lowercase() { + name.make_ascii_lowercase(); } - filename.into() + filename = name.into_boxed_slice(); } else { - let mut bytes = filename.into_bytes(); - bytes.reverse(); - - bytes.into() - }; + let mut name = filename_rev.into_bytes(); + name.reverse(); + filename = name.into_boxed_slice(); + } Some(Ok(FatDirectoryEntry { - size: entry.size, - entry_offset, + size: cur_entry.size, + entry_offset: nr_entry_scanned * size_of::() as u32, filename, - cluster: entry.cluster_low as u32 | (((entry.cluster_high & !0xF000) as u32) << 16), - is_directory: entry.is_directory(), + cluster, + is_directory: cur_entry.is_directory(), })) } } diff --git a/src/fs/fat32/file.rs b/src/fs/fat32/file.rs index db16df50..2df69728 100644 --- a/src/fs/fat32/file.rs +++ b/src/fs/fat32/file.rs @@ -1,40 +1,24 @@ -use super::{ClusterIterator, FatFs}; -use crate::{ - kernel::mem::{AsMemoryBlock as _, Page}, - KResult, -}; - -pub trait ClusterReadIterator<'data>: Iterator> + 'data {} -impl<'a, I> ClusterReadIterator<'a> for I where I: Iterator> + 'a {} +use futures::Stream; -pub(super) trait ClusterRead<'data> { - fn read<'vfs>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> - where - Self: Sized, - 'vfs: 'data; -} +use crate::{kernel::mem::Page, prelude::KResult}; -impl<'data, 'fat: 'data> ClusterRead<'data> for ClusterIterator<'fat> { - fn read<'vfs: 'data>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> { - const SECTOR_SIZE: usize = 512; +use super::{ClusterIterator, FatFs}; - let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE; - assert!(cluster_size <= 0x1000, "Cluster size is too large"); +pub trait ReadClusters { + fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send; +} - let skip_clusters = offset / cluster_size; - let mut inner_offset = offset % cluster_size; +impl ReadClusters for ClusterIterator<'_> { + fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send { + futures::stream::unfold(self, move |mut me| async { + let cluster = me.next()?; + let page = Page::alloc(); - // TODO: Use block cache. - let buffer_page = Page::alloc(); + if let Err(err) = fs.read_cluster(cluster, &page).await { + return Some((Err(err), me)); + } - self.skip(skip_clusters).map(move |cluster| { - vfs.read_cluster(cluster, &buffer_page)?; - let data = unsafe { - // SAFETY: No one could be writing to it. - &buffer_page.as_memblk().as_bytes()[inner_offset..] - }; - inner_offset = 0; - Ok(data) + Some((Ok(page), me)) }) } } diff --git a/src/fs/mod.rs b/src/fs/mod.rs index 5d9285ec..c59ee801 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,5 +1,4 @@ +// pub mod ext4; pub mod fat32; pub mod procfs; -pub mod shm; pub mod tmpfs; -pub mod ext4; diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 2ed24613..57b881df 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,325 +1,264 @@ -use crate::kernel::constants::{EACCES, ENOTDIR}; -use crate::kernel::task::block_on; +use crate::kernel::constants::{EACCES, EISDIR, ENOTDIR}; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::{AtomicMode, Mode}; +use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::types::{DeviceId, Format, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; use crate::{ io::Buffer, kernel::{ mem::paging::PageBuffer, vfs::{ dentry::Dentry, - inode::{define_struct_inode, AtomicIno, Ino, Inode, InodeData}, + inode::{Ino, Inode}, mount::{dump_mounts, register_filesystem, Mount, MountCreator}, - vfs::Vfs, - DevId, }, }, prelude::*, }; -use alloc::sync::{Arc, Weak}; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked}; -use itertools::Itertools; - -#[allow(dead_code)] -pub trait ProcFsFile: Send + Sync { - fn can_read(&self) -> bool { - false - } - - fn can_write(&self) -> bool { - false - } +use alloc::sync::Arc; +use async_trait::async_trait; +use core::future::Future; +use core::sync::atomic::{AtomicU64, Ordering}; +use eonix_sync::{LazyLock, RwLock}; + +struct Node { + ino: Ino, + sb: SbRef, + info: Spin, + kind: NodeKind, +} - fn read(&self, _buffer: &mut PageBuffer) -> KResult { - Err(EACCES) - } +enum NodeKind { + File(FileInode), + Dir(DirInode), +} - fn write(&self, _buffer: &[u8]) -> KResult { - Err(EACCES) - } +struct FileInode { + read: Option KResult<()> + Send + Sync>>, + write: Option<()>, } -pub enum ProcFsNode { - File(Arc), - Dir(Arc), +struct DirInode { + entries: RwLock, InodeUse)>>, } -impl ProcFsNode { - fn unwrap(&self) -> Arc { - match self { - ProcFsNode::File(inode) => inode.clone(), - ProcFsNode::Dir(inode) => inode.clone(), - } - } +impl InodeOps for Node { + type SuperBlock = ProcFs; fn ino(&self) -> Ino { - match self { - ProcFsNode::File(inode) => inode.ino, - ProcFsNode::Dir(inode) => inode.ino, - } + self.ino } -} -define_struct_inode! { - pub struct FileInode { - file: Box, - } -} - -impl FileInode { - pub fn new(ino: Ino, vfs: Weak, file: Box) -> Arc { - let mut mode = Mode::REG; - if file.can_read() { - mode.set_perm(0o444); - } - if file.can_write() { - mode.set_perm(0o222); + fn format(&self) -> Format { + match &self.kind { + NodeKind::File(_) => Format::REG, + NodeKind::Dir(_) => Format::DIR, } + } - let mut inode = Self { - idata: InodeData::new(ino, vfs), - file, - }; + fn info(&self) -> &Spin { + &self.info + } - inode.idata.mode.store(mode); - inode.idata.nlink.store(1, Ordering::Relaxed); - *inode.ctime.get_mut() = Instant::now(); - *inode.mtime.get_mut() = Instant::now(); - *inode.atime.get_mut() = Instant::now(); + fn super_block(&self) -> &SbRef { + &self.sb + } - Arc::new(inode) + fn page_cache(&self) -> Option<&crate::kernel::mem::PageCache> { + None } } -impl Inode for FileInode { - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - if !self.file.can_read() { +impl InodeFileOps for Node { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + let NodeKind::File(file_inode) = &self.kind else { + return Err(EISDIR); + }; + + let Some(read_fn) = &file_inode.read else { return Err(EACCES); - } + }; let mut page_buffer = PageBuffer::new(); - self.file.read(&mut page_buffer)?; + read_fn(&mut page_buffer)?; - let data = page_buffer - .data() - .split_at_checked(offset) - .map(|(_, data)| data); + let Some((_, data)) = page_buffer.data().split_at_checked(offset) else { + return Ok(0); + }; - match data { - None => Ok(0), - Some(data) => Ok(buffer.fill(data)?.allow_partial()), - } + Ok(buffer.fill(data)?.allow_partial()) } } -define_struct_inode! { - pub struct DirInode { - entries: Locked, ProcFsNode)>, ()>, - } -} +impl InodeDirOps for Node { + async fn lookup(&self, dentry: &Arc) -> KResult>> { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; -impl DirInode { - pub fn new(ino: Ino, vfs: Weak) -> Arc { - Self::new_locked(ino, vfs, |inode, rwsem| unsafe { - addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem)); - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::DIR.perm(0o755))); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} + let entries = dir.entries.read().await; + + let dent_name = dentry.name(); + for (name, node) in entries.iter() { + if *name == ***dent_name { + return Ok(Some(node.clone() as _)); + } + } -impl Inode for DirInode { - fn lookup(&self, dentry: &Arc) -> KResult>> { - let lock = block_on(self.rwsem.read()); - Ok(self - .entries - .access(lock.prove()) - .iter() - .find_map(|(name, node)| (name == &***dentry.name()).then(|| node.unwrap()))) + Ok(None) } - fn do_readdir( - &self, + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let lock = block_on(self.rwsem.read()); - self.entries - .access(lock.prove()) - .iter() - .skip(offset) - .map(|(name, node)| callback(name.as_ref(), node.ino())) - .take_while(|result| result.map_or(true, |flow| flow.is_continue())) - .take_while_inclusive(|result| result.is_ok()) - .fold_ok(0, |acc, _| acc + 1) + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + Box::pin(async move { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; + + let entries = dir.entries.read().await; + + let mut count = 0; + for (name, node) in entries.iter().skip(offset) { + match callback(name.as_ref(), node.ino) { + Err(err) => return Ok(Err(err)), + Ok(true) => count += 1, + Ok(false) => break, + } + } + + Ok(Ok(count)) + }) } } -impl_any!(ProcFs); -pub struct ProcFs { - root_node: Arc, - next_ino: AtomicIno, -} - -impl Vfs for ProcFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - 10 +impl Node { + pub fn new_file( + ino: Ino, + sb: SbRef, + read: impl Fn(&mut PageBuffer) -> KResult<()> + Send + Sync + 'static, + ) -> InodeUse { + InodeUse::new(Self { + ino, + sb, + info: Spin::new(InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o444), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + kind: NodeKind::File(FileInode::new(Box::new(read))), + }) } - fn is_read_only(&self) -> bool { - false + fn new_dir(ino: Ino, sb: SbRef) -> InodeUse { + InodeUse::new(Self { + ino, + sb, + info: Spin::new(InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o755), + atime: Instant::UNIX_EPOCH, + ctime: Instant::UNIX_EPOCH, + mtime: Instant::UNIX_EPOCH, + }), + kind: NodeKind::Dir(DirInode::new()), + }) } } -static GLOBAL_PROCFS: LazyLock> = LazyLock::new(|| { - Arc::new_cyclic(|weak: &Weak| ProcFs { - root_node: DirInode::new(0, weak.clone()), - next_ino: AtomicIno::new(1), - }) -}); - -struct ProcFsMountCreator; - -#[allow(dead_code)] -impl ProcFsMountCreator { - pub fn get() -> Arc { - GLOBAL_PROCFS.clone() - } - - pub fn get_weak() -> Weak { - Arc::downgrade(&GLOBAL_PROCFS) +impl FileInode { + fn new(read: Box KResult<()> + Send + Sync>) -> Self { + Self { + read: Some(read), + write: None, + } } } -impl MountCreator for ProcFsMountCreator { - fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { - let vfs = ProcFsMountCreator::get(); - let root_inode = vfs.root_node.clone(); - Mount::new(mp, vfs, root_inode) - } - - fn check_signature(&self, _: &[u8]) -> KResult { - Ok(true) +impl DirInode { + pub fn new() -> Self { + Self { + entries: RwLock::new(vec![]), + } } } -pub fn root() -> ProcFsNode { - let vfs = ProcFsMountCreator::get(); - let root = vfs.root_node.clone(); - - ProcFsNode::Dir(root) +pub struct ProcFs { + root: InodeUse, + next_ino: AtomicU64, } -pub fn creat( - parent: &ProcFsNode, - name: Arc<[u8]>, - file: Box, -) -> KResult { - let parent = match parent { - ProcFsNode::File(_) => return Err(ENOTDIR), - ProcFsNode::Dir(parent) => parent, - }; - - let fs = ProcFsMountCreator::get(); - let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed); - - let inode = FileInode::new(ino, Arc::downgrade(&fs), file); - - { - let lock = block_on(parent.idata.rwsem.write()); - parent - .entries - .access_mut(lock.prove_mut()) - .push((name, ProcFsNode::File(inode.clone()))); +impl SuperBlock for ProcFs {} +impl ProcFs { + fn assign_ino(&self) -> Ino { + Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) } - - Ok(ProcFsNode::File(inode)) } -#[allow(dead_code)] -pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult { - let parent = match parent { - ProcFsNode::File(_) => return Err(ENOTDIR), - ProcFsNode::Dir(parent) => parent, - }; - - let fs = ProcFsMountCreator::get(); - let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed); - - let inode = DirInode::new(ino, Arc::downgrade(&fs)); +static GLOBAL_PROCFS: LazyLock> = LazyLock::new(|| { + SbUse::new_cyclic( + SuperBlockInfo { + io_blksize: 4096, + device_id: DeviceId::new(0, 10), + read_only: false, + }, + |sbref| ProcFs { + root: Node::new_dir(Ino::new(0), sbref), + next_ino: AtomicU64::new(1), + }, + ) +}); - parent - .entries - .access_mut(block_on(inode.rwsem.write()).prove_mut()) - .push((Arc::from(name), ProcFsNode::Dir(inode.clone()))); +struct ProcFsMountCreator; - Ok(ProcFsNode::Dir(inode)) -} +#[async_trait] +impl MountCreator for ProcFsMountCreator { + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let fs = GLOBAL_PROCFS.clone(); + let root_inode = fs.backend.root.clone(); -struct DumpMountsFile; -impl ProcFsFile for DumpMountsFile { - fn can_read(&self) -> bool { - true + Mount::new(mp, fs, root_inode) } - fn read(&self, buffer: &mut PageBuffer) -> KResult { - dump_mounts(&mut buffer.get_writer()); - - Ok(buffer.data().len()) + fn check_signature(&self, _: &[u8]) -> KResult { + Ok(true) } } -pub fn init() { - register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap(); - - creat( - &root(), - Arc::from(b"mounts".as_slice()), - Box::new(DumpMountsFile), - ) - .unwrap(); -} - -pub struct GenericProcFsFile +pub async fn populate_root(name: Arc<[u8]>, read_fn: F) where - ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>, + F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, { - read_fn: Option, -} + let procfs = &GLOBAL_PROCFS.backend; + let root = &procfs.root; -impl ProcFsFile for GenericProcFsFile -where - ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>, -{ - fn can_read(&self) -> bool { - self.read_fn.is_some() - } + let NodeKind::Dir(root) = &root.kind else { + unreachable!(); + }; - fn read(&self, buffer: &mut PageBuffer) -> KResult { - self.read_fn.as_ref().ok_or(EACCES)?(buffer).map(|_| buffer.data().len()) - } + let mut entries = root.entries.write().await; + entries.push(( + name.clone(), + Node::new_file(procfs.assign_ino(), SbRef::from(&GLOBAL_PROCFS), read_fn), + )); } -pub fn populate_root(name: Arc<[u8]>, read_fn: F) -> KResult<()> -where - F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, -{ - let root = root(); - - creat( - &root, - name, - Box::new(GenericProcFsFile { - read_fn: Some(read_fn), - }), - ) - .map(|_| ()) +pub async fn init() { + register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap(); + + populate_root(Arc::from(b"mounts".as_slice()), |buffer| { + dump_mounts(&mut buffer.get_writer()); + Ok(()) + }) + .await; } diff --git a/src/fs/shm.rs b/src/fs/shm.rs deleted file mode 100644 index 09d36141..00000000 --- a/src/fs/shm.rs +++ /dev/null @@ -1,146 +0,0 @@ -use core::sync::atomic::{AtomicU32, Ordering}; - -use alloc::{collections::btree_map::BTreeMap, sync::Arc}; -use bitflags::bitflags; -use eonix_sync::{LazyLock, Mutex}; - -use crate::{ - fs::tmpfs::{DirectoryInode, FileInode, TmpFs}, - kernel::{constants::ENOSPC, vfs::inode::Mode}, - prelude::KResult, -}; - -bitflags! { - #[derive(Debug, Clone, Copy)] - pub struct ShmFlags: u32 { - /// Create a new segment. If this flag is not used, then shmget() will - /// find the segment associated with key and check to see if the user - /// has permission to access the segment. - const IPC_CREAT = 0o1000; - /// This flag is used with IPC_CREAT to ensure that this call creates - /// the segment. If the segment already exists, the call fails. - const IPC_EXCL = 0o2000; - - /// Attach the segment for read-only access.If this flag is not specified, - /// the segment is attached for read and write access, and the process - /// must have read and write permission for the segment. - const SHM_RDONLY = 0o10000; - /// round attach address to SHMLBA boundary - const SHM_RND = 0o20000; - /// Allow the contents of the segment to be executed. - const SHM_EXEC = 0o100000; - } -} - -pub const IPC_PRIVATE: usize = 0; - -pub struct ShmManager { - tmpfs: Arc, - root: Arc, - areas: BTreeMap, -} - -#[repr(C)] -#[derive(Default, Clone, Copy, Debug)] -pub struct IpcPerm { - key: i32, - uid: u32, - gid: u32, - cuid: u32, - cgid: u32, - mode: u16, - seq: u16, -} - -#[repr(C)] -#[derive(Debug, Clone, Copy)] -pub struct ShmIdDs { - // Ownership and permissions - pub shm_perm: IpcPerm, - // Size of segment (bytes). In our system, this must be aligned - pub shm_segsz: usize, - // Last attach time - pub shm_atime: usize, - // Last detach time - pub shm_dtime: usize, - // Creation time/time of last modification via shmctl() - pub shm_ctime: usize, - // PID of creator - pub shm_cpid: usize, - // PID of last shmat(2)/shmdt(2) - pub shm_lpid: usize, - // No. of current attaches - pub shm_nattch: usize, -} - -impl ShmIdDs { - fn new(size: usize, pid: u32) -> Self { - Self { - shm_perm: IpcPerm::default(), - shm_segsz: size, - shm_atime: 0, - shm_dtime: 0, - shm_ctime: 0, // Should set instant now - shm_cpid: pid as usize, - shm_lpid: 0, - shm_nattch: 0, - } - } -} - -#[derive(Debug)] -pub struct ShmArea { - pub area: Arc, - pub shmid_ds: ShmIdDs, -} - -// A big lock here to protect the shared memory area. -// Can be improved with finer-grained locking? -pub static SHM_MANAGER: LazyLock> = - LazyLock::new(|| Mutex::new(ShmManager::new())); - -impl ShmManager { - fn new() -> Self { - let (tmpfs, root) = TmpFs::create(false).expect("should create shm_area successfully"); - Self { - tmpfs, - root, - areas: BTreeMap::new(), - } - } - - pub fn create_shared_area(&self, size: usize, pid: u32, mode: Mode) -> ShmArea { - let ino = self.tmpfs.assign_ino(); - let vfs = Arc::downgrade(&self.tmpfs); - ShmArea { - area: FileInode::new(ino, vfs, size, mode), - shmid_ds: ShmIdDs::new(size, pid), - } - } - - pub fn get(&self, shmid: u32) -> Option<&ShmArea> { - self.areas.get(&shmid) - } - - pub fn insert(&mut self, shmid: u32, area: ShmArea) { - self.areas.insert(shmid, area); - } -} - -pub fn gen_shm_id(key: usize) -> KResult { - const SHM_MAGIC: u32 = 114514000; - - static NEXT_SHMID: AtomicU32 = AtomicU32::new(0); - - if key == IPC_PRIVATE { - let shmid = NEXT_SHMID.fetch_add(1, Ordering::Relaxed); - - if shmid >= SHM_MAGIC { - return Err(ENOSPC); - } else { - return Ok(shmid); - } - } - - (key as u32).checked_add(SHM_MAGIC).ok_or(ENOSPC) -} diff --git a/src/fs/tmpfs.rs b/src/fs/tmpfs.rs deleted file mode 100644 index 7a5bd52b..00000000 --- a/src/fs/tmpfs.rs +++ /dev/null @@ -1,613 +0,0 @@ -use crate::io::Stream; -use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR}; -use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend}; -use crate::kernel::task::block_on; -use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::RenameData; -use crate::kernel::vfs::inode::{AtomicMode, InodeData}; -use crate::{ - io::Buffer, - kernel::vfs::{ - dentry::{dcache, Dentry}, - inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode, WriteOffset}, - mount::{register_filesystem, Mount, MountCreator, MS_RDONLY}, - vfs::Vfs, - DevId, - }, - prelude::*, -}; -use alloc::sync::{Arc, Weak}; -use core::fmt::Debug; -use core::{ops::ControlFlow, sync::atomic::Ordering}; -use eonix_mm::paging::PAGE_SIZE; -use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut}; -use itertools::Itertools; - -fn acquire(vfs: &Weak) -> KResult> { - vfs.upgrade().ok_or(EIO) -} - -fn astmp(vfs: &Arc) -> &TmpFs { - vfs.as_any() - .downcast_ref::() - .expect("corrupted tmpfs data structure") -} - -define_struct_inode! { - struct NodeInode { - devid: DevId, - } -} - -impl NodeInode { - fn new(ino: Ino, vfs: Weak, mode: Mode, devid: DevId) -> Arc { - Self::new_locked(ino, vfs, |inode, _| unsafe { - addr_of_mut_field!(inode, devid).write(devid); - - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(mode)); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} - -impl Inode for NodeInode { - fn devid(&self) -> KResult { - Ok(self.devid) - } -} - -define_struct_inode! { - pub(super) struct DirectoryInode { - entries: Locked, Ino)>, ()>, - } -} - -impl DirectoryInode { - fn new(ino: Ino, vfs: Weak, mode: Mode) -> Arc { - Self::new_locked(ino, vfs, |inode, rwsem| unsafe { - addr_of_mut_field!(inode, entries) - .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem)); - - addr_of_mut_field!(&mut *inode, size).write(1.into()); - addr_of_mut_field!(&mut *inode, mode) - .write(AtomicMode::from(Mode::DIR.perm(mode.non_format_bits()))); - addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } - - fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: ProofMut<'_, ()>) { - let now = Instant::now(); - - // SAFETY: Only `unlink` will do something based on `nlink` count - // No need to synchronize here - file.nlink.fetch_add(1, Ordering::Relaxed); - *self.ctime.lock() = now; - - // SAFETY: `rwsem` has done the synchronization - self.size.fetch_add(1, Ordering::Relaxed); - *self.mtime.lock() = now; - - self.entries.access_mut(dlock).push((name, file.ino)); - } - - fn do_unlink( - &self, - file: &Arc, - filename: &[u8], - entries: &mut Vec<(Arc<[u8]>, Ino)>, - now: Instant, - decrease_size: bool, - _dir_lock: ProofMut<()>, - _file_lock: ProofMut<()>, - ) -> KResult<()> { - // SAFETY: `file_lock` has done the synchronization - if file.mode.load().is_dir() { - return Err(EISDIR); - } - - entries.retain(|(name, ino)| *ino != file.ino || name.as_ref() != filename); - - if decrease_size { - // SAFETY: `dir_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - } - - *self.mtime.lock() = now; - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `file_lock` has done the synchronization - file.nlink.fetch_sub(1, Ordering::Relaxed); - *file.ctime.lock() = now; - - Ok(()) - } -} - -impl Inode for DirectoryInode { - fn do_readdir( - &self, - offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - let lock = block_on(self.rwsem.read()); - self.entries - .access(lock.prove()) - .iter() - .skip(offset) - .map(|(name, ino)| callback(&name, *ino)) - .take_while(|result| result.map_or(true, |flow| flow.is_continue())) - .take_while_inclusive(|result| result.is_ok()) - .fold_ok(0, |acc, _| acc + 1) - } - - fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = FileInode::new(ino, self.vfs.clone(), 0, mode); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_reg(file) - } - - fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - if !mode.is_chr() && !mode.is_blk() { - return Err(EINVAL); - } - - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = NodeInode::new(ino, self.vfs.clone(), mode, dev); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_reg(file) - } - - fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let file = SymlinkInode::new(ino, self.vfs.clone(), target.into()); - - self.link(at.get_name(), file.as_ref(), rwsem.prove_mut()); - at.save_symlink(file) - } - - fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { - let vfs = acquire(&self.vfs)?; - let vfs = astmp(&vfs); - - let rwsem = block_on(self.rwsem.write()); - - let ino = vfs.assign_ino(); - let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode); - - self.link(at.get_name(), newdir.as_ref(), rwsem.prove_mut()); - at.save_dir(newdir) - } - - fn unlink(&self, at: &Arc) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - - let dir_lock = block_on(self.rwsem.write()); - - let file = at.get_inode()?; - let filename = at.get_name(); - let file_lock = block_on(file.rwsem.write()); - - let entries = self.entries.access_mut(dir_lock.prove_mut()); - - self.do_unlink( - &file, - &filename, - entries, - Instant::now(), - true, - dir_lock.prove_mut(), - file_lock.prove_mut(), - )?; - - // Remove the dentry from the dentry cache immediately - // so later lookup will fail with ENOENT - dcache::d_remove(at); - - Ok(()) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - let _lock = block_on(self.rwsem.write()); - - // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(); - self.mode.store(old.perm(mode.non_format_bits())); - *self.ctime.lock() = Instant::now(); - - Ok(()) - } - - fn rename(&self, rename_data: RenameData) -> KResult<()> { - let RenameData { - old_dentry, - new_dentry, - new_parent, - is_exchange, - no_replace, - vfs, - } = rename_data; - - if is_exchange { - println_warn!("TmpFs does not support exchange rename for now"); - return Err(ENOSYS); - } - - let vfs = vfs - .as_any() - .downcast_ref::() - .expect("vfs must be a TmpFs"); - - let _rename_lock = block_on(vfs.rename_lock.lock()); - - let old_file = old_dentry.get_inode()?; - let new_file = new_dentry.get_inode(); - - if no_replace && new_file.is_ok() { - return Err(EEXIST); - } - - let same_parent = Arc::as_ptr(&new_parent) == &raw const *self; - if same_parent { - // Same directory rename - // Remove from old location and add to new location - let parent_lock = block_on(self.rwsem.write()); - let entries = self.entries.access_mut(parent_lock.prove_mut()); - - fn rename_old( - old_entry: &mut (Arc<[u8]>, Ino), - old_file: &Arc, - new_dentry: &Arc, - now: Instant, - ) { - let (name, _) = old_entry; - *name = new_dentry.get_name(); - *old_file.ctime.lock() = now; - } - - let old_ino = old_file.ino; - let new_ino = new_file.as_ref().ok().map(|f| f.ino); - let old_name = old_dentry.get_name(); - let new_name = new_dentry.get_name(); - - // Find the old and new entries in the directory after we've locked the directory. - let indices = - entries - .iter() - .enumerate() - .fold([None, None], |[old, new], (idx, (name, ino))| { - if Some(*ino) == new_ino && *name == new_name { - [old, Some(idx)] - } else if *ino == old_ino && *name == old_name { - [Some(idx), new] - } else { - [old, new] - } - }); - - let (old_entry_idx, new_entry_idx) = match indices { - [None, ..] => return Err(ENOENT), - [Some(old_idx), new_idx] => (old_idx, new_idx), - }; - - let now = Instant::now(); - - if let Some(new_idx) = new_entry_idx { - // Replace existing file (i.e. rename the old and unlink the new) - let new_file = new_file.unwrap(); - let _new_file_lock = block_on(new_file.rwsem.write()); - - // SAFETY: `new_file_lock` has done the synchronization - match (new_file.mode.load(), old_file.mode.load()) { - (Mode::DIR, _) => return Err(EISDIR), - (_, Mode::DIR) => return Err(ENOTDIR), - _ => {} - } - - entries.remove(new_idx); - - // SAFETY: `parent_lock` has done the synchronization - self.size.fetch_sub(1, Ordering::Relaxed); - - // The last reference to the inode is held by some dentry - // and will be released when the dentry is released - - // SAFETY: `new_file_lock` has done the synchronization - new_file.nlink.fetch_sub(1, Ordering::Relaxed); - *new_file.ctime.lock() = now; - } - - rename_old(&mut entries[old_entry_idx], &old_file, new_dentry, now); - *self.mtime.lock() = now; - } else { - // Cross-directory rename - handle similar to same directory case - - // Get new parent directory - let new_parent_inode = new_dentry.parent().get_inode()?; - assert!(new_parent_inode.is_dir()); - let new_parent = (new_parent_inode.as_ref() as &dyn Any) - .downcast_ref::() - .expect("new parent must be a DirectoryInode"); - - let old_parent_lock = block_on(self.rwsem.write()); - let new_parent_lock = block_on(new_parent_inode.rwsem.write()); - - let old_ino = old_file.ino; - let new_ino = new_file.as_ref().ok().map(|f| f.ino); - let old_name = old_dentry.get_name(); - let new_name = new_dentry.get_name(); - - // Find the old entry in the old directory - let old_entries = self.entries.access_mut(old_parent_lock.prove_mut()); - let old_pos = old_entries - .iter() - .position(|(name, ino)| *ino == old_ino && *name == old_name) - .ok_or(ENOENT)?; - - // Find the new entry in the new directory (if it exists) - let new_entries = new_parent.entries.access_mut(new_parent_lock.prove_mut()); - let has_new = new_entries - .iter() - .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name) - .is_some(); - - let now = Instant::now(); - - if has_new { - // Replace existing file (i.e. move the old and unlink the new) - let new_file = new_file.unwrap(); - let new_file_lock = block_on(new_file.rwsem.write()); - - match (old_file.mode.load(), new_file.mode.load()) { - (Mode::DIR, Mode::DIR) => {} - (Mode::DIR, _) => return Err(ENOTDIR), - (_, _) => {} - } - - // Unlink the old file that was replaced - new_parent.do_unlink( - &new_file, - &new_name, - new_entries, - now, - false, - new_parent_lock.prove_mut(), - new_file_lock.prove_mut(), - )?; - } else { - new_parent.size.fetch_add(1, Ordering::Relaxed); - } - - // Remove from old directory - old_entries.remove(old_pos); - - // Add new entry - new_entries.push((new_name, old_ino)); - - self.size.fetch_sub(1, Ordering::Relaxed); - *self.mtime.lock() = now; - *old_file.ctime.lock() = now; - } - - block_on(dcache::d_exchange(old_dentry, new_dentry)); - - Ok(()) - } -} - -define_struct_inode! { - struct SymlinkInode { - target: Arc<[u8]>, - } -} - -impl SymlinkInode { - fn new(ino: Ino, vfs: Weak, target: Arc<[u8]>) -> Arc { - Self::new_locked(ino, vfs, |inode, _| unsafe { - let len = target.len(); - addr_of_mut_field!(inode, target).write(target); - - addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::LNK.perm(0o777))); - addr_of_mut_field!(&mut *inode, size).write((len as u64).into()); - addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now())); - addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now())); - }) - } -} - -impl Inode for SymlinkInode { - fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - buffer - .fill(self.target.as_ref()) - .map(|result| result.allow_partial()) - } - - fn chmod(&self, _: Mode) -> KResult<()> { - Ok(()) - } -} - -define_struct_inode! { - pub struct FileInode { - pages: PageCache, - } -} - -impl Debug for FileInode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "FileInode({:?})", self.idata) - } -} - -impl FileInode { - pub fn new(ino: Ino, vfs: Weak, size: usize, mode: Mode) -> Arc { - let inode = Arc::new_cyclic(|weak_self: &Weak| FileInode { - idata: InodeData::new(ino, vfs), - pages: PageCache::new(weak_self.clone()), - }); - - inode.mode.store(Mode::REG.perm(mode.non_format_bits())); - inode.nlink.store(1, Ordering::Relaxed); - inode.size.store(size as u64, Ordering::Relaxed); - inode - } -} - -impl PageCacheBackend for FileInode { - fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn size(&self) -> usize { - self.size.load(Ordering::Relaxed) as usize - } -} - -impl Inode for FileInode { - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.pages) - } - - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let _lock = block_on(self.rwsem.write()); - block_on(self.pages.read(buffer, offset)) - } - - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { - // TODO: We don't need that strong guarantee, find some way to avoid locks - let _lock = block_on(self.rwsem.write()); - - let mut store_new_end = None; - let offset = match offset { - WriteOffset::Position(offset) => offset, - WriteOffset::End(end) => { - store_new_end = Some(end); - - // SAFETY: `lock` has done the synchronization - self.size.load(Ordering::Relaxed) as usize - } - }; - - let wrote = block_on(self.pages.write(stream, offset))?; - let cursor_end = offset + wrote; - - if let Some(store_end) = store_new_end { - *store_end = cursor_end; - } - - // SAFETY: `lock` has done the synchronization - *self.mtime.lock() = Instant::now(); - self.size.store(cursor_end as u64, Ordering::Relaxed); - - Ok(wrote) - } - - fn truncate(&self, length: usize) -> KResult<()> { - let _lock = block_on(self.rwsem.write()); - block_on(self.pages.resize(length))?; - self.size.store(length as u64, Ordering::Relaxed); - *self.mtime.lock() = Instant::now(); - Ok(()) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - let _vfs = acquire(&self.vfs)?; - let _lock = block_on(self.rwsem.write()); - - // SAFETY: `rwsem` has done the synchronization - let old = self.mode.load(); - self.mode.store(old.perm(mode.non_format_bits())); - *self.ctime.lock() = Instant::now(); - - Ok(()) - } -} - -impl_any!(TmpFs); -pub(super) struct TmpFs { - next_ino: AtomicIno, - readonly: bool, - rename_lock: Mutex<()>, -} - -impl Vfs for TmpFs { - fn io_blksize(&self) -> usize { - 4096 - } - - fn fs_devid(&self) -> DevId { - 2 - } - - fn is_read_only(&self) -> bool { - self.readonly - } -} - -impl TmpFs { - pub(super) fn assign_ino(&self) -> Ino { - self.next_ino.fetch_add(1, Ordering::AcqRel) - } - - pub fn create(readonly: bool) -> KResult<(Arc, Arc)> { - let tmpfs = Arc::new(Self { - next_ino: AtomicIno::new(1), - readonly, - rename_lock: Mutex::new(()), - }); - - let weak = Arc::downgrade(&tmpfs); - let root_dir = DirectoryInode::new(0, weak, Mode::new(0o755)); - - Ok((tmpfs, root_dir)) - } -} - -struct TmpFsMountCreator; - -impl MountCreator for TmpFsMountCreator { - fn create_mount(&self, _source: &str, flags: u64, mp: &Arc) -> KResult { - let (fs, root_inode) = TmpFs::create(flags & MS_RDONLY != 0)?; - - Mount::new(mp, fs, root_inode) - } - - fn check_signature(&self, _: &[u8]) -> KResult { - Ok(true) - } -} - -pub fn init() { - register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap(); -} diff --git a/src/fs/tmpfs/dir.rs b/src/fs/tmpfs/dir.rs new file mode 100644 index 00000000..e2be1d12 --- /dev/null +++ b/src/fs/tmpfs/dir.rs @@ -0,0 +1,415 @@ +use core::{any::Any, future::Future}; + +use alloc::{boxed::Box, sync::Arc, vec, vec::Vec}; +use eonix_log::println_warn; +use eonix_sync::{LazyLock, RwLock, Spin}; + +use crate::{ + kernel::{ + constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR}, + mem::PageCache, + timer::Instant, + vfs::{ + dentry::{dcache, Dentry}, + inode::{ + Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, RenameData, + }, + types::{DeviceId, Format, Mode, Permission}, + SbRef, + }, + }, + prelude::KResult, +}; + +use super::{ + file::{DeviceInode, FileInode, SymlinkInode}, + TmpFs, +}; + +pub struct DirectoryInode { + sb: SbRef, + ino: Ino, + info: Spin, + entries: RwLock, Ino)>>, +} + +impl InodeOps for DirectoryInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + Format::DIR + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None + } +} + +impl DirectoryInode { + pub fn new(ino: Ino, sb: SbRef, perm: Permission) -> InodeUse { + static DOT: LazyLock> = LazyLock::new(|| Arc::from(b".".as_slice())); + + let now = Instant::now(); + + InodeUse::new(Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: 1, + nlink: 1, // link from `.` to itself + perm, + ctime: now, + mtime: now, + atime: now, + uid: 0, + gid: 0, + }), + entries: RwLock::new(vec![(DOT.clone(), ino)]), + }) + } + + fn link( + &self, + entries: &mut Vec<(Arc<[u8]>, Ino)>, + name: Arc<[u8]>, + file: &InodeUse, + ) { + let mut self_info = self.info.lock(); + let mut file_info = file.info().lock(); + + let now = Instant::now(); + + file_info.nlink += 1; + file_info.ctime = now; + + self_info.size += 1; + self_info.mtime = now; + self_info.ctime = now; + + entries.push((name, file.ino())); + } + + fn do_unlink( + &self, + file: &InodeUse, + filename: &[u8], + entries: &mut Vec<(Arc<[u8]>, Ino)>, + now: Instant, + decrease_size: bool, + self_info: &mut InodeInfo, + file_info: &mut InodeInfo, + ) -> KResult<()> { + // SAFETY: `file_lock` has done the synchronization + if file.format() == Format::DIR { + return Err(EISDIR); + } + + let file_ino = file.ino(); + entries.retain(|(name, ino)| *ino != file_ino || name.as_ref() != filename); + + if decrease_size { + self_info.size -= 1; + } + + self_info.mtime = now; + self_info.ctime = now; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + file_info.nlink -= 1; + file_info.ctime = now; + + // TODO!!!: Remove the file if nlink == 1 + + Ok(()) + } +} + +impl InodeDirOps for DirectoryInode { + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + Box::pin(async move { + let _sb = self.sb.get()?; + let entries = self.entries.read().await; + + let mut count = 0; + for entry in entries.iter().skip(offset) { + match for_each_entry(&entry.0, entry.1) { + Err(err) => return Ok(Err(err)), + Ok(false) => break, + Ok(true) => count += 1, + } + } + + Ok(Ok(count)) + }) + } + + async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file: InodeUse = FileInode::new(ino, self.sb.clone(), 0, perm); + + self.link(&mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + if !mode.is_chr() && !mode.is_blk() { + return Err(EINVAL); + } + + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file: InodeUse = DeviceInode::new(ino, self.sb.clone(), mode, dev); + + self.link(&mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let file: InodeUse = SymlinkInode::new(ino, self.sb.clone(), target.into()); + + self.link(&mut entries, at.get_name(), &file); + at.fill(file); + + Ok(()) + } + + async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { + let sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let ino = sb.backend.assign_ino(); + let new_dir: InodeUse = DirectoryInode::new(ino, self.sb.clone(), perm); + + self.link(&mut entries, at.get_name(), &new_dir); + at.fill(new_dir); + + Ok(()) + } + + async fn unlink(&self, at: &Arc) -> KResult<()> { + let _sb = self.sb.get()?; + let mut entries = self.entries.write().await; + + let file = at.get_inode()?; + let filename = at.get_name(); + + self.do_unlink( + &file, + &filename, + &mut entries, + Instant::now(), + true, + &mut self.info.lock(), + &mut file.info().lock(), + )?; + + // Remove the dentry from the dentry cache immediately + // so later lookup will fail with ENOENT + dcache::d_remove(at); + + Ok(()) + } + + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { + let sb = self.sb.get()?; + let _rename_lock = sb.backend.rename_lock.lock().await; + let mut self_entries = self.entries.write().await; + + let RenameData { + old_dentry, + new_dentry, + new_parent, + is_exchange, + no_replace, + } = rename_data; + + if is_exchange { + println_warn!("TmpFs does not support exchange rename for now"); + return Err(ENOSYS); + } + + let old_file = old_dentry.get_inode()?; + let new_file = new_dentry.inode(); + + if no_replace && new_file.is_some() { + return Err(EEXIST); + } + + if new_parent.as_raw() == &raw const *self { + // Same directory rename + // Remove from old location and add to new location + let old_ino = old_file.ino(); + let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_name = old_dentry.get_name(); + let new_name = new_dentry.get_name(); + + // Find the old and new entries in the directory after we've locked the directory. + let (mut old_ent_idx, mut new_ent_idx) = (None, None); + for (idx, (name, ino)) in self_entries.iter().enumerate() { + if *ino == old_ino && *name == old_name { + old_ent_idx = Some(idx); + } + + if Some(*ino) == new_ino && *name == new_name { + new_ent_idx = Some(idx); + } + } + + let Some(old_ent_idx) = old_ent_idx else { + return Err(ENOENT); + }; + + if Some(old_ent_idx) == new_ent_idx { + return Ok(()); + } + + let now = Instant::now(); + if let Some(new_idx) = new_ent_idx { + // Replace existing file (i.e. rename the old and unlink the new) + let new_file = new_file.unwrap(); + + match (new_file.format(), old_file.format()) { + (Format::DIR, _) => return Err(EISDIR), + (_, Format::DIR) => return Err(ENOTDIR), + _ => {} + } + + self_entries.remove(new_idx); + + self.info.lock().size -= 1; + + // The last reference to the inode is held by some dentry + // and will be released when the dentry is released + + let mut new_info = new_file.info().lock(); + + new_info.nlink -= 1; + new_info.mtime = now; + new_info.ctime = now; + } + + let (name, _) = &mut self_entries[old_ent_idx]; + *name = new_dentry.get_name(); + + let mut self_info = self.info.lock(); + self_info.mtime = now; + self_info.ctime = now; + } else { + // Cross-directory rename - handle similar to same directory case + + // Get new parent directory + let new_parent_inode = new_dentry.parent().get_inode()?; + assert_eq!(new_parent_inode.format(), Format::DIR); + + let new_parent = (&new_parent_inode as &dyn Any) + .downcast_ref::() + .expect("new parent must be a DirectoryInode"); + + let mut new_entries = new_parent.entries.write().await; + + let old_ino = old_file.ino(); + let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_name = old_dentry.get_name(); + let new_name = new_dentry.get_name(); + + // Find the old entry in the old directory + let old_pos = self_entries + .iter() + .position(|(name, ino)| *ino == old_ino && *name == old_name) + .ok_or(ENOENT)?; + + // Find the new entry in the new directory (if it exists) + let has_new = new_entries + .iter() + .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name) + .is_some(); + + let now = Instant::now(); + + if has_new { + // Replace existing file (i.e. move the old and unlink the new) + let new_file = new_file.unwrap(); + + match (old_file.format(), new_file.format()) { + (Format::DIR, Format::DIR) => {} + (Format::DIR, _) => return Err(ENOTDIR), + (_, _) => {} + } + + // Unlink the old file that was replaced + new_parent.do_unlink( + &new_file, + &new_name, + &mut new_entries, + now, + false, + &mut new_parent.info.lock(), + &mut new_file.info().lock(), + )?; + } else { + new_parent.info.lock().size += 1; + new_parent.info.lock().mtime = now; + new_parent.info.lock().ctime = now; + } + + // Remove from old directory + self_entries.remove(old_pos); + + // Add new entry + new_entries.push((new_name, old_ino)); + + let mut self_info = self.info.lock(); + self_info.size -= 1; + self_info.mtime = now; + self_info.ctime = now; + } + + dcache::d_exchange(old_dentry, new_dentry).await; + Ok(()) + } +} + +impl InodeFileOps for DirectoryInode { + async fn chmod(&self, perm: Permission) -> KResult<()> { + let _sb = self.sb.get()?; + + { + let mut info = self.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); + } + + Ok(()) + } +} diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs new file mode 100644 index 00000000..624112e0 --- /dev/null +++ b/src/fs/tmpfs/file.rs @@ -0,0 +1,298 @@ +use alloc::sync::Arc; +use eonix_mm::paging::PAGE_SIZE; +use eonix_sync::{RwLock, Spin}; + +use crate::{ + io::{Buffer, Stream}, + kernel::{ + mem::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}, + timer::Instant, + vfs::{ + inode::{Ino, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, WriteOffset}, + types::{DeviceId, Format, Mode, Permission}, + SbRef, + }, + }, + prelude::KResult, +}; + +use super::TmpFs; + +pub struct FileInode { + sb: SbRef, + ino: Ino, + info: Spin, + rwsem: RwLock<()>, + pages: PageCache, +} + +impl FileInode { + pub fn new(ino: Ino, sb: SbRef, size: usize, perm: Permission) -> InodeUse { + let now = Instant::now(); + + InodeUse::new_cyclic(|weak| Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: size as _, + nlink: 1, + uid: 0, + gid: 0, + perm, + atime: now, + ctime: now, + mtime: now, + }), + rwsem: RwLock::new(()), + pages: PageCache::new(weak.clone() as _), + }) + } +} + +impl PageCacheBackendOps for FileInode { + async fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult { + Ok(PAGE_SIZE) + } + + async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { + Ok(PAGE_SIZE) + } + + fn size(&self) -> usize { + self.info.lock().size as usize + } +} + +impl InodeOps for FileInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + Format::REG + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + Some(&self.pages) + } +} + +impl InodeDirOps for FileInode {} +impl InodeFileOps for FileInode { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + let _lock = self.rwsem.read().await; + self.pages.read(buffer, offset).await + } + + async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { + let _lock = self.rwsem.write().await; + + let mut store_new_end = None; + let offset = match offset { + WriteOffset::Position(offset) => offset, + WriteOffset::End(end) => { + store_new_end = Some(end); + + // `info.size` won't change since we are holding the write lock. + self.info.lock().size as usize + } + }; + + let wrote = self.pages.write(stream, offset).await?; + let cursor_end = offset + wrote; + + if let Some(store_end) = store_new_end { + *store_end = cursor_end; + } + + { + let now = Instant::now(); + let mut info = self.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = info.size.max(cursor_end as u64); + } + + Ok(wrote) + } + + async fn truncate(&self, length: usize) -> KResult<()> { + let _lock = self.rwsem.write().await; + + self.pages.resize(length).await?; + + { + let now = Instant::now(); + let mut info = self.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = length as u64; + } + + Ok(()) + } + + async fn chmod(&self, perm: Permission) -> KResult<()> { + let _sb = self.sb.get()?; + + { + let mut info = self.info.lock(); + + info.perm = perm; + info.ctime = Instant::now(); + } + + Ok(()) + } +} + +pub struct DeviceInode { + sb: SbRef, + ino: Ino, + info: Spin, + is_block: bool, + devid: DeviceId, +} + +impl DeviceInode { + pub fn new(ino: Ino, sb: SbRef, mode: Mode, devid: DeviceId) -> InodeUse { + let now = Instant::now(); + + InodeUse::new(Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: 0, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(mode.non_format_bits()), + atime: now, + ctime: now, + mtime: now, + }), + is_block: mode.format() == Format::BLK, + devid, + }) + } +} + +impl InodeOps for DeviceInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + if self.is_block { + Format::BLK + } else { + Format::CHR + } + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None + } +} + +impl InodeDirOps for DeviceInode {} +impl InodeFileOps for DeviceInode { + async fn chmod(&self, perm: Permission) -> KResult<()> { + let _sb = self.sb.get()?; + + { + let mut info = self.info.lock(); + + info.perm = perm; + info.ctime = Instant::now(); + } + + Ok(()) + } + + fn devid(&self) -> KResult { + Ok(self.devid) + } +} + +pub struct SymlinkInode { + sb: SbRef, + ino: Ino, + info: Spin, + target: Arc<[u8]>, +} + +impl SymlinkInode { + pub fn new(ino: Ino, sb: SbRef, target: Arc<[u8]>) -> InodeUse { + let now = Instant::now(); + + InodeUse::new(Self { + sb, + ino, + info: Spin::new(InodeInfo { + size: target.len() as _, + nlink: 1, + uid: 0, + gid: 0, + perm: Permission::new(0o777), + atime: now, + ctime: now, + mtime: now, + }), + target, + }) + } +} + +impl InodeDirOps for SymlinkInode {} +impl InodeOps for SymlinkInode { + type SuperBlock = TmpFs; + + fn ino(&self) -> Ino { + self.ino + } + + fn format(&self) -> Format { + Format::LNK + } + + fn info(&self) -> &Spin { + &self.info + } + + fn super_block(&self) -> &SbRef { + &self.sb + } + + fn page_cache(&self) -> Option<&PageCache> { + None + } +} + +impl InodeFileOps for SymlinkInode { + async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + buffer + .fill(self.target.as_ref()) + .map(|result| result.allow_partial()) + } +} diff --git a/src/fs/tmpfs/mod.rs b/src/fs/tmpfs/mod.rs new file mode 100644 index 00000000..2bef67b6 --- /dev/null +++ b/src/fs/tmpfs/mod.rs @@ -0,0 +1,73 @@ +mod dir; +mod file; + +use crate::kernel::vfs::inode::{Ino, InodeUse}; +use crate::kernel::vfs::types::{DeviceId, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::{ + kernel::vfs::{ + dentry::Dentry, + mount::{register_filesystem, Mount, MountCreator}, + }, + prelude::*, +}; +use alloc::sync::Arc; +use async_trait::async_trait; +use core::sync::atomic::AtomicU64; +use core::sync::atomic::Ordering; +use dir::DirectoryInode; +use eonix_sync::Mutex; + +pub struct TmpFs { + next_ino: AtomicU64, + rename_lock: Mutex<()>, +} + +impl SuperBlock for TmpFs {} + +impl TmpFs { + fn assign_ino(&self) -> Ino { + Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) + } + + fn create() -> KResult<(SbUse, InodeUse)> { + let tmpfs = SbUse::new( + SuperBlockInfo { + io_blksize: 4096, + device_id: DeviceId::new(0, 2), + read_only: false, + }, + Self { + next_ino: AtomicU64::new(1), + rename_lock: Mutex::new(()), + }, + ); + + let root_dir = DirectoryInode::new( + tmpfs.backend.assign_ino(), + SbRef::from(&tmpfs), + Permission::new(0o755), + ); + + Ok((tmpfs, root_dir)) + } +} + +struct TmpFsMountCreator; + +#[async_trait] +impl MountCreator for TmpFsMountCreator { + async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc) -> KResult { + let (fs, root_inode) = TmpFs::create()?; + + Mount::new(mp, fs, root_inode) + } + + fn check_signature(&self, _: &[u8]) -> KResult { + Ok(true) + } +} + +pub fn init() { + register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap(); +} diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 349e3656..3e4b65d1 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -3,7 +3,7 @@ mod mbr; use super::{ constants::ENOENT, mem::{paging::Page, AsMemoryBlock as _}, - vfs::DevId, + vfs::types::DeviceId, }; use crate::kernel::constants::{EEXIST, EINVAL}; use crate::{ @@ -14,13 +14,10 @@ use alloc::{ collections::btree_map::{BTreeMap, Entry}, sync::Arc, }; +use async_trait::async_trait; use core::cmp::Ordering; use mbr::MBRPartTable; -pub fn make_device(major: u32, minor: u32) -> DevId { - (major << 8) & 0xff00u32 | minor & 0xffu32 -} - pub struct Partition { pub lba_offset: u64, pub sector_count: u64, @@ -30,11 +27,12 @@ pub trait PartTable { fn partitions(&self) -> impl Iterator + use<'_, Self>; } +#[async_trait] pub trait BlockRequestQueue: Send + Sync { /// Maximum number of sectors that can be read in one request fn max_request_pages(&self) -> u64; - fn submit(&self, req: BlockDeviceRequest) -> KResult<()>; + async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()>; } enum BlockDeviceType { @@ -42,7 +40,7 @@ enum BlockDeviceType { queue: Arc, }, Partition { - disk_dev: DevId, + disk_dev: DeviceId, lba_offset: u64, queue: Arc, }, @@ -50,7 +48,7 @@ enum BlockDeviceType { pub struct BlockDevice { /// Unique device identifier, major and minor numbers - devid: DevId, + devid: DeviceId, /// Total size of the device in sectors (512 bytes each) sector_count: u64, @@ -77,11 +75,11 @@ impl Ord for BlockDevice { } } -static BLOCK_DEVICE_LIST: Spin>> = Spin::new(BTreeMap::new()); +static BLOCK_DEVICE_LIST: Spin>> = Spin::new(BTreeMap::new()); impl BlockDevice { pub fn register_disk( - devid: DevId, + devid: DeviceId, size: u64, queue: Arc, ) -> KResult> { @@ -97,13 +95,13 @@ impl BlockDevice { } } - pub fn get(devid: DevId) -> KResult> { + pub fn get(devid: DeviceId) -> KResult> { BLOCK_DEVICE_LIST.lock().get(&devid).cloned().ok_or(ENOENT) } } impl BlockDevice { - pub fn devid(&self) -> DevId { + pub fn devid(&self) -> DeviceId { self.devid } @@ -121,7 +119,7 @@ impl BlockDevice { }; let device = Arc::new(BlockDevice { - devid: make_device(self.devid >> 8, (self.devid & 0xff) + idx as u32 + 1), + devid: DeviceId::new(self.devid.major, self.devid.minor + idx as u16 + 1), sector_count: size, dev_type: BlockDeviceType::Partition { disk_dev: self.devid, @@ -159,7 +157,7 @@ impl BlockDevice { /// - `req.sector` must be within the disk size /// - `req.buffer` must be enough to hold the data /// - pub fn commit_request(&self, mut req: BlockDeviceRequest) -> KResult<()> { + pub async fn commit_request(&self, mut req: BlockDeviceRequest<'_>) -> KResult<()> { // Verify the request parameters. match &mut req { BlockDeviceRequest::Read { sector, count, .. } => { @@ -184,7 +182,7 @@ impl BlockDevice { } } - self.queue().submit(req) + self.queue().submit(req).await } /// Read some from the block device, may involve some copy and fragmentation @@ -194,7 +192,7 @@ impl BlockDevice { /// # Arguments /// `offset` - offset in bytes /// - pub fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { + pub async fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { let mut sector_start = offset as u64 / 512; let mut first_sector_offset = offset as u64 % 512; let mut sector_count = (first_sector_offset + buffer.total() as u64 + 511) / 512; @@ -241,7 +239,7 @@ impl BlockDevice { buffer: &pages, }; - self.commit_request(req)?; + self.commit_request(req).await?; for page in pages.iter() { // SAFETY: We are the only owner of the page so no one could be mutating it. @@ -277,7 +275,7 @@ impl BlockDevice { /// `offset` - offset in bytes /// `data` - data to write /// - pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult { + pub async fn write_some(&self, offset: usize, data: &[u8]) -> KResult { let mut sector_start = offset as u64 / 512; let mut first_sector_offset = offset as u64 % 512; let mut remaining_data = data; @@ -320,7 +318,7 @@ impl BlockDevice { count: sector_count, buffer: pages, }; - self.commit_request(read_req)?; + self.commit_request(read_req).await?; } let mut data_offset = 0; @@ -356,7 +354,7 @@ impl BlockDevice { count: sector_count, buffer: pages, }; - self.commit_request(write_req)?; + self.commit_request(write_req).await?; let bytes_written = data_offset; nwritten += bytes_written; diff --git a/src/kernel/block/mbr.rs b/src/kernel/block/mbr.rs index 74cdc36e..c5820679 100644 --- a/src/kernel/block/mbr.rs +++ b/src/kernel/block/mbr.rs @@ -31,7 +31,7 @@ pub struct MBRPartTable { impl MBRPartTable { pub async fn from_disk(disk: &BlockDevice) -> KResult { let mut mbr: UninitBuffer = UninitBuffer::new(); - disk.read_some(0, &mut mbr)?.ok_or(EIO)?; + disk.read_some(0, &mut mbr).await?.ok_or(EIO)?; let mbr = mbr.assume_init()?; if mbr.magic != [0x55, 0xaa] { diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index aff3271e..4e01d83a 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -1,10 +1,9 @@ use super::{ - block::make_device, console::get_console, constants::{EEXIST, EIO}, task::{block_on, ProcessList, Thread}, terminal::Terminal, - vfs::{DevId, File, FileType, TerminalFile}, + vfs::{types::DeviceId, File, FileType, TerminalFile}, }; use crate::{ io::{Buffer, Stream, StreamRead}, @@ -34,7 +33,7 @@ pub struct CharDevice { device: CharDeviceType, } -static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap::new()); +static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap::new()); impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { @@ -54,11 +53,11 @@ impl CharDevice { } } - pub fn get(devid: DevId) -> Option> { + pub fn get(devid: DeviceId) -> Option> { CHAR_DEVICES.lock().get(&devid).cloned() } - pub fn register(devid: DevId, name: Arc, device: CharDeviceType) -> KResult<()> { + pub fn register(devid: DeviceId, name: Arc, device: CharDeviceType) -> KResult<()> { match CHAR_DEVICES.lock().entry(devid) { Entry::Vacant(entry) => { entry.insert(Arc::new(CharDevice { name, device })); @@ -134,19 +133,19 @@ impl VirtualCharDevice for ConsoleDevice { impl CharDevice { pub fn init() -> KResult<()> { Self::register( - make_device(1, 3), + DeviceId::new(1, 3), Arc::from("null"), CharDeviceType::Virtual(Box::new(NullDevice)), )?; Self::register( - make_device(1, 5), + DeviceId::new(1, 5), Arc::from("zero"), CharDeviceType::Virtual(Box::new(ZeroDevice)), )?; Self::register( - make_device(5, 1), + DeviceId::new(5, 1), Arc::from("console"), CharDeviceType::Virtual(Box::new(ConsoleDevice)), )?; diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index efd06824..c147306e 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -12,5 +12,5 @@ pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend}; +pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}; pub use paging::{Page, PageBuffer}; diff --git a/src/kernel/mem/mm_list/mapping.rs b/src/kernel/mem/mm_list/mapping.rs index 662000ba..5446ae42 100644 --- a/src/kernel/mem/mm_list/mapping.rs +++ b/src/kernel/mem/mm_list/mapping.rs @@ -1,24 +1,15 @@ -use core::fmt::Debug; - -use crate::kernel::vfs::inode::Inode; -use alloc::sync::Arc; +use crate::kernel::vfs::inode::{Inode, InodeUse}; use eonix_mm::paging::PAGE_SIZE; #[derive(Debug, Clone)] pub struct FileMapping { - pub file: Arc, + pub file: InodeUse, /// Offset in the file, aligned to 4KB boundary. pub offset: usize, /// Length of the mapping. Exceeding part will be zeroed. pub length: usize, } -impl Debug for dyn Inode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "Inode()") - } -} - #[derive(Debug, Clone)] pub enum Mapping { // private anonymous memory @@ -28,7 +19,7 @@ pub enum Mapping { } impl FileMapping { - pub fn new(file: Arc, offset: usize, length: usize) -> Self { + pub fn new(file: InodeUse, offset: usize, length: usize) -> Self { assert_eq!(offset & (PAGE_SIZE - 1), 0); Self { file, diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 3ccf3255..9deb50cf 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -6,8 +6,10 @@ use crate::{ GlobalPageAlloc, }; use align_ext::AlignExt; +use alloc::boxed::Box; use alloc::{collections::btree_map::BTreeMap, sync::Weak}; -use core::mem::ManuallyDrop; +use async_trait::async_trait; +use core::{future::Future, mem::ManuallyDrop}; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::{ address::{PAddr, PhysAccess}, @@ -159,7 +161,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?; + .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) + .await?; pages.insert(page_id, new_page); } } @@ -205,7 +208,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?; + .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) + .await?; new_page }; @@ -224,7 +228,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?; + .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS) + .await?; page.clear_dirty(); } } @@ -286,7 +291,8 @@ impl PageCache { self.backend .upgrade() .unwrap() - .read_page(&mut new_page, offset_aligin)?; + .read_page(&mut new_page, offset_aligin) + .await?; pages.insert(page_id, new_page); new_page.0 } @@ -349,14 +355,47 @@ impl Stream for CachePageStream { // for fs, offset is file offset (floor algin to PAGE_SIZE) // for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) // Oh no, this would make unnecessary cache -pub trait PageCacheBackend { - fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; +pub trait PageCacheBackendOps: Sized { + fn read_page( + &self, + page: &mut CachePage, + offset: usize, + ) -> impl Future> + Send; + + fn write_page( + &self, + page: &mut CachePageStream, + offset: usize, + ) -> impl Future> + Send; - fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; + fn size(&self) -> usize; +} +#[async_trait] +pub trait PageCacheBackend: Send + Sync { + async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; + async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; fn size(&self) -> usize; } +#[async_trait] +impl PageCacheBackend for T +where + T: PageCacheBackendOps + Send + Sync + 'static, +{ + async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { + self.read_page(page, offset).await + } + + async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult { + self.write_page(page, offset).await + } + + fn size(&self) -> usize { + self.size() + } +} + pub trait PageCacheRawPage: RawPage { fn valid_size(&self) -> &mut usize; diff --git a/src/kernel/pcie/driver.rs b/src/kernel/pcie/driver.rs index be88b7df..eebaa896 100644 --- a/src/kernel/pcie/driver.rs +++ b/src/kernel/pcie/driver.rs @@ -4,21 +4,24 @@ use super::{ }; use crate::{kernel::constants::EEXIST, KResult}; use alloc::{ + boxed::Box, collections::btree_map::{self, BTreeMap}, sync::Arc, }; +use async_trait::async_trait; use eonix_sync::Spin; static PCIE_DRIVERS: Spin>> = Spin::new(BTreeMap::new()); +#[async_trait] pub trait PCIDriver: Send + Sync { fn vendor_id(&self) -> u16; fn device_id(&self) -> u16; - fn handle_device(&self, device: Arc>) -> Result<(), PciError>; + async fn handle_device(&self, device: Arc>) -> Result<(), PciError>; } -pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { +pub async fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { let index = (driver.vendor_id() as u32) << 16 | driver.device_id() as u32; let driver = Arc::new(driver); @@ -31,7 +34,7 @@ pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> { let devices = PCIE_DEVICES.lock().get(&index).cloned(); if let Some(devices) = devices { for device in devices { - driver.handle_device(device)?; + driver.handle_device(device).await?; } }; diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 1a48b255..db32b0e5 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -7,7 +7,7 @@ use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; use crate::kernel::timer::sleep; use crate::kernel::vfs::filearray::FD; -use crate::kernel::vfs::inode::Mode; +use crate::kernel::vfs::types::{DeviceId, Mode}; use crate::kernel::vfs::{PollEvent, SeekOption}; use crate::{ io::{Buffer, BufferFill}, @@ -41,7 +41,7 @@ impl FromSyscallArg for AtFlags { } } -fn dentry_from( +async fn dentry_from( thread: &Thread, dirfd: FD, pathname: User, @@ -52,7 +52,7 @@ fn dentry_from( match (path.as_cstr().to_bytes_with_nul()[0], dirfd) { (b'/', _) | (_, FD::AT_FDCWD) => { let path = Path::new(path.as_cstr().to_bytes())?; - Dentry::open(&thread.fs_context, path, follow_symlink) + Dentry::open(&thread.fs_context, path, follow_symlink).await } (0, dirfd) => { let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; @@ -63,7 +63,7 @@ fn dentry_from( let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; let dir_dentry = dir_file.as_path().ok_or(ENOTDIR)?; - Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink) + Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink).await } } } @@ -119,13 +119,11 @@ async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KRes } #[eonix_macros::define_syscall(SYS_OPENAT)] -async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mut mode: Mode) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?; +async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: Mode) -> KResult { + let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?; + let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); - - thread.files.open(&dentry, flags, mode) + thread.files.open(&dentry, flags, perm).await } #[cfg(target_arch = "x86_64")] @@ -206,7 +204,7 @@ async fn newfstatat( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; let statbuf = UserPointerMut::new(statbuf)?; @@ -247,7 +245,7 @@ async fn statx( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; dentry.statx(&mut statx, mask)?; @@ -257,12 +255,11 @@ async fn statx( } #[eonix_macros::define_syscall(SYS_MKDIRAT)] -async fn mkdirat(dirfd: FD, pathname: User, mut mode: Mode) -> KResult<()> { - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); +async fn mkdirat(dirfd: FD, pathname: User, mode: Mode) -> KResult<()> { + let dentry = dentry_from(thread, dirfd, pathname, true).await?; + let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - let dentry = dentry_from(thread, dirfd, pathname, true)?; - dentry.mkdir(mode) + dentry.mkdir(perm).await } #[cfg(target_arch = "x86_64")] @@ -274,7 +271,7 @@ async fn mkdir(pathname: User, mode: u32) -> KResult<()> { #[eonix_macros::define_syscall(SYS_FTRUNCATE64)] async fn truncate64(fd: FD, length: usize) -> KResult<()> { let file = thread.files.get(fd).ok_or(EBADF)?; - file.as_path().ok_or(EBADF)?.truncate(length) + file.as_path().ok_or(EBADF)?.truncate(length).await } #[cfg(target_arch = "x86_64")] @@ -290,7 +287,10 @@ async fn truncate(pathname: User, length: usize) -> KResult<()> { #[eonix_macros::define_syscall(SYS_UNLINKAT)] async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { - dentry_from(thread, dirfd, pathname, false)?.unlink() + dentry_from(thread, dirfd, pathname, false) + .await? + .unlink() + .await } #[cfg(target_arch = "x86_64")] @@ -302,9 +302,9 @@ async fn unlink(pathname: User) -> KResult<()> { #[eonix_macros::define_syscall(SYS_SYMLINKAT)] async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { let target = UserString::new(target)?; - let dentry = dentry_from(thread, dirfd, linkpath, false)?; + let dentry = dentry_from(thread, dirfd, linkpath, false).await?; - dentry.symlink(target.as_cstr().to_bytes()) + dentry.symlink(target.as_cstr().to_bytes()).await } #[cfg(target_arch = "x86_64")] @@ -313,18 +313,36 @@ async fn symlink(target: User, linkpath: User) -> KResult<()> { sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) } +#[derive(Clone, Copy, Debug)] +#[repr(transparent)] +struct UserDeviceId(u32); + +impl FromSyscallArg for UserDeviceId { + fn from_arg(value: usize) -> Self { + Self(value as u32) + } +} + +impl UserDeviceId { + pub fn into_devid(self) -> DeviceId { + let major = (self.0 >> 8) & 0xfff; + let minor = (self.0 & 0xff) | ((self.0 >> 12) & 0xfff00); + + // TODO: We strip off the high 4 bits of the minor ID for now... + DeviceId::new(major as u16, minor as u16) + } +} + #[eonix_macros::define_syscall(SYS_MKNODAT)] -async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: u32) -> KResult<()> { +async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceId) -> KResult<()> { if !mode.is_blk() && !mode.is_chr() { return Err(EINVAL); } - let dentry = dentry_from(thread, dirfd, pathname, true)?; - - let umask = *thread.fs_context.umask.lock(); - mode.mask_perm(!umask.non_format_bits()); + let dentry = dentry_from(thread, dirfd, pathname, true).await?; + mode.set_perm(mode.perm().mask_with(*thread.fs_context.umask.lock())); - dentry.mknod(mode, dev) + dentry.mknod(mode, dev.into_devid()).await } #[cfg(target_arch = "x86_64")] @@ -340,10 +358,10 @@ async fn readlinkat( buffer: UserMut, bufsize: usize, ) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, false)?; + let dentry = dentry_from(thread, dirfd, pathname, false).await?; let mut buffer = UserBuffer::new(buffer, bufsize)?; - dentry.readlink(&mut buffer) + dentry.readlink(&mut buffer).await } #[cfg(target_arch = "x86_64")] @@ -471,7 +489,7 @@ async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { @@ -614,12 +632,12 @@ async fn fchownat( gid: u32, flags: AtFlags, ) -> KResult<()> { - let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?; + let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?; if !dentry.is_valid() { return Err(ENOENT); } - dentry.chown(uid, gid) + dentry.chown(uid, gid).await } #[eonix_macros::define_syscall(SYS_FCHMODAT)] @@ -628,14 +646,14 @@ async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { return Err(ENOENT); } - dentry.chmod(mode) + dentry.chmod(mode).await } #[eonix_macros::define_syscall(SYS_FCHMOD)] @@ -654,7 +672,7 @@ async fn utimensat( let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() } else { - dentry_from(thread, dirfd, pathname, !flags.no_follow())? + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await? }; if !dentry.is_valid() { @@ -688,10 +706,10 @@ async fn renameat2( Err(EINVAL)?; } - let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false)?; - let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false)?; + let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false).await?; + let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false).await?; - old_dentry.rename(&new_dentry, flags) + old_dentry.rename(&new_dentry, flags).await } #[cfg(target_arch = "x86_64")] diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index c6300ac7..4cb7908c 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,10 +1,8 @@ use super::FromSyscallArg; -use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER}; -use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT}; +use crate::kernel::constants::{EBADF, EINVAL}; use crate::kernel::mem::FileMapping; use crate::kernel::task::Thread; use crate::kernel::vfs::filearray::FD; -use crate::kernel::vfs::inode::Mode; use crate::{ kernel::{ constants::{UserMmapFlags, UserMmapProtocol}, @@ -66,13 +64,7 @@ async fn do_mmap2( if !is_shared { Mapping::Anonymous } else { - // The mode is unimportant here, since we are checking prot in mm_area. - let shared_area = SHM_MANAGER.lock().await.create_shared_area( - len, - thread.process.pid, - Mode::REG.perm(0o777), - ); - Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len)) + unimplemented!("mmap MAP_ANONYMOUS | MAP_SHARED"); } } else { let file = thread @@ -179,114 +171,4 @@ async fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<() .await } -#[eonix_macros::define_syscall(SYS_SHMGET)] -async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult { - let size = size.align_up(PAGE_SIZE); - - let mut shm_manager = SHM_MANAGER.lock().await; - let shmid = gen_shm_id(key)?; - - let mode = Mode::REG.perm(shmflg); - let shmflg = ShmFlags::from_bits_truncate(shmflg); - - if key == IPC_PRIVATE { - let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode); - shm_manager.insert(shmid, new_shm); - return Ok(shmid); - } - - if let Some(_) = shm_manager.get(shmid) { - if shmflg.contains(ShmFlags::IPC_CREAT | ShmFlags::IPC_EXCL) { - return Err(EEXIST); - } - - return Ok(shmid); - } - - if shmflg.contains(ShmFlags::IPC_CREAT) { - let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode); - shm_manager.insert(shmid, new_shm); - return Ok(shmid); - } - - Err(ENOENT) -} - -#[eonix_macros::define_syscall(SYS_SHMAT)] -async fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult { - let mm_list = &thread.process.mm_list; - let shm_manager = SHM_MANAGER.lock().await; - let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?; - - // Why is this not used? - let _mode = shmflg & 0o777; - let shmflg = ShmFlags::from_bits_truncate(shmflg); - - let mut permission = Permission { - read: true, - write: true, - execute: false, - }; - - if shmflg.contains(ShmFlags::SHM_EXEC) { - permission.execute = true; - } - if shmflg.contains(ShmFlags::SHM_RDONLY) { - permission.write = false; - } - - let size = shm_area.shmid_ds.shm_segsz; - - let mapping = Mapping::File(FileMapping { - file: shm_area.area.clone(), - offset: 0, - length: size, - }); - - let addr = if addr != 0 { - if addr % PAGE_SIZE != 0 && !shmflg.contains(ShmFlags::SHM_RND) { - return Err(EINVAL); - } - let addr = VAddr::from(addr.align_down(PAGE_SIZE)); - mm_list - .mmap_fixed(addr, size, mapping, permission, true) - .await - } else { - mm_list - .mmap_hint(VAddr::NULL, size, mapping, permission, true) - .await - }?; - - thread.process.shm_areas.lock().insert(addr, size); - - Ok(addr.addr()) -} - -#[eonix_macros::define_syscall(SYS_SHMDT)] -async fn shmdt(addr: usize) -> KResult<()> { - let addr = VAddr::from(addr); - - let size = { - let mut shm_areas = thread.process.shm_areas.lock(); - let size = *shm_areas.get(&addr).ok_or(EINVAL)?; - shm_areas.remove(&addr); - - size - }; - - thread.process.mm_list.unmap(addr, size).await -} - -#[eonix_macros::define_syscall(SYS_SHMCTL)] -async fn shmctl(_shmid: u32, _op: i32, _shmid_ds: usize) -> KResult { - // TODO - Ok(0) -} - -#[eonix_macros::define_syscall(SYS_MEMBARRIER)] -async fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> { - // TODO - Ok(()) -} - pub fn keep_alive() {} diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 7dd573cc..b4d3e449 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -16,7 +16,7 @@ use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; use crate::kernel::user::UserString; use crate::kernel::user::{UserPointer, UserPointerMut}; -use crate::kernel::vfs::inode::Mode; +use crate::kernel::vfs::types::Permission; use crate::kernel::vfs::{self, dentry::Dentry}; use crate::path::Path; use crate::{kernel::user::UserBuffer, prelude::*}; @@ -100,10 +100,11 @@ async fn clock_nanosleep( } #[eonix_macros::define_syscall(SYS_UMASK)] -async fn umask(mask: Mode) -> KResult { - let mut umask = thread.fs_context.umask.lock(); +async fn umask(raw_new_mask: u32) -> KResult { + let new_mask = Permission::new(!raw_new_mask); + let old_mask = core::mem::replace(&mut *thread.fs_context.umask.lock(), new_mask); - Ok(core::mem::replace(&mut *umask, mask.non_format())) + Ok(!old_mask.bits()) } #[eonix_macros::define_syscall(SYS_GETCWD)] @@ -124,7 +125,7 @@ async fn chdir(path: User) -> KResult<()> { let path = UserString::new(path)?; let path = Path::new(path.as_cstr().to_bytes())?; - let dentry = Dentry::open(&thread.fs_context, path, true)?; + let dentry = Dentry::open(&thread.fs_context, path, true).await?; if !dentry.is_valid() { return Err(ENOENT); } @@ -159,7 +160,8 @@ async fn mount(source: User, target: User, fstype: User, flags: usiz &thread.fs_context, Path::new(target.as_cstr().to_bytes())?, true, - )?; + ) + .await?; if !mountpoint.is_valid() { return Err(ENOENT); @@ -172,6 +174,7 @@ async fn mount(source: User, target: User, fstype: User, flags: usiz fstype.as_cstr().to_str().map_err(|_| EINVAL)?, flags as u64, ) + .await } fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult> { @@ -199,14 +202,15 @@ async fn execve(exec: User, argv: User, envp: User) -> KResult Elf { Err(ENOEXEC) } - fn parse(elf_file: Arc) -> KResult { + async fn parse(elf_file: Arc) -> KResult { let mut elf_header = UninitBuffer::>::new(); - elf_file.read(&mut elf_header, 0)?; + elf_file.read(&mut elf_header, 0).await?; let elf_header = elf_header.assume_init().map_err(|_| ENOEXEC)?; @@ -203,10 +203,12 @@ impl Elf { let ph_count = elf_header.pt2.ph_count; let mut program_headers = vec![E::Ph::default(); ph_count as usize]; - elf_file.read( - &mut ByteBuffer::from(program_headers.as_mut_slice()), - ph_offset.into_usize(), - )?; + elf_file + .read( + &mut ByteBuffer::from(program_headers.as_mut_slice()), + ph_offset.into_usize(), + ) + .await?; Ok(Self { file: elf_file, @@ -390,12 +392,13 @@ impl Elf { } async fn load_ldso(&self, mm_list: &MMList) -> KResult> { - let ldso_path = self.ldso_path()?; + let ldso_path = self.ldso_path().await?; if let Some(ldso_path) = ldso_path { let fs_context = FsContext::global(); - let ldso_file = Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true)?; - let ldso_elf = Elf::::parse(ldso_file)?; + let ldso_file = + Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true).await?; + let ldso_elf = Elf::::parse(ldso_file).await?; let base = VAddr::from(E::LDSO_BASE_ADDR); @@ -420,7 +423,7 @@ impl Elf { mm_list.map_vdso().await } - fn ldso_path(&self) -> KResult> { + async fn ldso_path(&self) -> KResult> { for program_header in &self.program_headers { let type_ = program_header.type_().map_err(|_| ENOEXEC)?; @@ -430,7 +433,8 @@ impl Elf { let mut ldso_vec = vec![0u8; file_size - 1]; // -1 due to '\0' self.file - .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset)?; + .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset) + .await?; let ldso_path = String::from_utf8(ldso_vec).map_err(|_| ENOEXEC)?; return Ok(Some(ldso_path)); } @@ -445,16 +449,16 @@ pub enum ELF { } impl ELF { - pub fn parse(elf_file: Arc) -> KResult { + pub async fn parse(elf_file: Arc) -> KResult { let mut header_pt1 = UninitBuffer::::new(); - elf_file.read(&mut header_pt1, 0)?; + elf_file.read(&mut header_pt1, 0).await?; let header_pt1 = header_pt1.assume_init().map_err(|_| ENOEXEC)?; assert_eq!(header_pt1.magic, ELF_MAGIC); match header_pt1.class() { - Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file)?)), - Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file)?)), + Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file).await?)), + Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file).await?)), _ => Err(ENOEXEC), } } diff --git a/src/kernel/task/loader/mod.rs b/src/kernel/task/loader/mod.rs index 4e3f4db1..7679aaf4 100644 --- a/src/kernel/task/loader/mod.rs +++ b/src/kernel/task/loader/mod.rs @@ -33,7 +33,7 @@ pub struct ProgramLoader { } impl ProgramLoader { - pub fn parse( + pub async fn parse( fs_context: &FsContext, mut exec_path: CString, mut file: Arc, @@ -49,12 +49,15 @@ impl ProgramLoader { } let mut magic = [0; 4]; - file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0)?; + file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0) + .await?; match magic { [b'#', b'!', ..] => { let mut interpreter_line = [0; 256]; - let nread = file.read(&mut ByteBuffer::new(&mut interpreter_line), 0)?; + let nread = file + .read(&mut ByteBuffer::new(&mut interpreter_line), 0) + .await?; // There is a tiny time gap between reading the magic number and // reading the interpreter line, so we need to check if the line @@ -77,7 +80,7 @@ impl ProgramLoader { } let path = Path::new(interpreter_name.as_bytes())?; - file = Dentry::open(fs_context, path, true)?; + file = Dentry::open(fs_context, path, true).await?; args.insert(0, interpreter_name.clone()); if let Some(arg) = interpreter_arg { @@ -92,7 +95,7 @@ impl ProgramLoader { exec_path = interpreter_name; } - ELF_MAGIC => break ELF::parse(file)?, + ELF_MAGIC => break ELF::parse(file).await?, _ => return Err(ENOEXEC), } diff --git a/src/kernel/timer.rs b/src/kernel/timer.rs index 9b6a3ff2..1dbb1382 100644 --- a/src/kernel/timer.rs +++ b/src/kernel/timer.rs @@ -76,6 +76,8 @@ impl Ticks { } impl Instant { + pub const UNIX_EPOCH: Self = Self::default(); + pub const fn default() -> Self { Instant { secs_since_epoch: 0, diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index 8bcd9f8a..5ac4e407 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -1,8 +1,9 @@ pub mod dcache; use super::{ - inode::{Ino, Inode, Mode, RenameData, WriteOffset}, - DevId, FsContext, + inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, + types::{DeviceId, Format, Mode, Permission}, + FsContext, }; use crate::{ hash::KernelHasher, @@ -14,22 +15,31 @@ use crate::{ }; use crate::{ io::Stream, - kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE}, + kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE}, }; -use alloc::sync::{Arc, Weak}; +use alloc::sync::Arc; use core::{ fmt, + future::Future, hash::{BuildHasher, BuildHasherDefault, Hasher}, - ops::ControlFlow, + pin::Pin, sync::atomic::{AtomicPtr, AtomicU64, Ordering}, }; use eonix_sync::LazyLock; use pointers::BorrowedArc; use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX}; +#[derive(PartialEq, Eq)] +enum DentryKind { + Regular, + Directory, + Symlink, + Mountpoint, +} + struct DentryData { - inode: Arc, - flags: u64, + inode: InodeUse, + kind: DentryKind, } /// # Safety @@ -79,12 +89,6 @@ impl fmt::Debug for Dentry { } } -const D_DIRECTORY: u64 = 1; -#[allow(dead_code)] -const D_MOUNTPOINT: u64 = 2; -const D_SYMLINK: u64 = 4; -const D_REGULAR: u64 = 8; - impl RCUNode for Dentry { fn rcu_prev(&self) -> &AtomicPtr { &self.prev @@ -116,11 +120,11 @@ impl Dentry { self.hash.store(hash, Ordering::Relaxed); } - fn find(self: &Arc, name: &[u8]) -> KResult> { + async fn find(self: &Arc, name: &[u8]) -> KResult> { let data = self.data.load(); let data = data.as_ref().ok_or(ENOENT)?; - if data.flags & D_DIRECTORY == 0 { + if data.kind != DentryKind::Directory { return Err(ENOTDIR); } @@ -141,7 +145,7 @@ impl Dentry { return Ok(found); } - dcache::d_try_revalidate(&dentry); + let _ = dcache::d_try_revalidate(&dentry).await; dcache::d_add(dentry.clone()); Ok(dentry) @@ -192,8 +196,8 @@ impl Dentry { .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent)) } - fn save_data(&self, inode: Arc, flags: u64) -> KResult<()> { - let new = DentryData { inode, flags }; + fn save(&self, inode: InodeUse, kind: DentryKind) { + let new = DentryData { inode, kind }; // TODO!!!: We don't actually need to use `RCUPointer` here // Safety: this function may only be called from `create`-like functions which requires the @@ -201,41 +205,35 @@ impl Dentry { // can't get a reference to the old data. let old = unsafe { self.data.swap(Some(Arc::new(new))) }; assert!(old.is_none()); - - Ok(()) } - pub fn save_reg(&self, file: Arc) -> KResult<()> { - self.save_data(file, D_REGULAR) - } - - pub fn save_symlink(&self, link: Arc) -> KResult<()> { - self.save_data(link, D_SYMLINK) + pub fn fill(&self, file: InodeUse) { + match file.format() { + Format::REG | Format::BLK | Format::CHR => self.save(file, DentryKind::Regular), + Format::DIR => self.save(file, DentryKind::Directory), + Format::LNK => self.save(file, DentryKind::Symlink), + } } - pub fn save_dir(&self, dir: Arc) -> KResult<()> { - self.save_data(dir, D_DIRECTORY) + pub fn inode(&self) -> Option> { + self.data.load().as_ref().map(|data| data.inode.clone()) } - pub fn get_inode(&self) -> KResult> { - self.data - .load() - .as_ref() - .ok_or(ENOENT) - .map(|data| data.inode.clone()) + pub fn get_inode(&self) -> KResult> { + self.inode().ok_or(ENOENT) } pub fn is_directory(&self) -> bool { let data = self.data.load(); data.as_ref() - .map_or(false, |data| data.flags & D_DIRECTORY != 0) + .map_or(false, |data| data.kind == DentryKind::Directory) } pub fn is_valid(&self) -> bool { self.data.load().is_some() } - pub fn open_check(self: &Arc, flags: OpenFlags, mode: Mode) -> KResult<()> { + pub async fn open_check(self: &Arc, flags: OpenFlags, perm: Permission) -> KResult<()> { let data = self.data.load(); if data.is_some() { @@ -250,7 +248,7 @@ impl Dentry { } let parent = self.parent().get_inode()?; - parent.creat(self, mode) + parent.create(self, perm).await } } } @@ -260,110 +258,120 @@ impl Dentry { context: &FsContext, dentry: Arc, nrecur: u32, - ) -> KResult> { - if nrecur >= 16 { - return Err(ELOOP); - } + ) -> Pin>> + use<'_>>> { + Box::pin(async move { + if nrecur >= 16 { + return Err(ELOOP); + } - let data = dentry.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; + let data = dentry.data.load(); + let data = data.as_ref().ok_or(ENOENT)?; - match data.flags { - flags if flags & D_REGULAR != 0 => Err(ENOTDIR), - flags if flags & D_DIRECTORY != 0 => Ok(dentry), - flags if flags & D_SYMLINK != 0 => { - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); + match data.kind { + DentryKind::Regular => Err(ENOTDIR), + DentryKind::Directory => Ok(dentry), + DentryKind::Symlink => { + let mut buffer = [0u8; 256]; + let mut buffer = ByteBuffer::new(&mut buffer); - data.inode.readlink(&mut buffer)?; - let path = Path::new(buffer.data())?; + data.inode.readlink(&mut buffer).await?; + let path = Path::new(buffer.data())?; - let dentry = - Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1)?; + let dentry = + Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1) + .await?; - Self::resolve_directory(context, dentry, nrecur + 1) + Self::resolve_directory(context, dentry, nrecur + 1).await + } + _ => panic!("Invalid dentry flags"), } - _ => panic!("Invalid dentry flags"), - } + }) } - pub fn open_recursive( - context: &FsContext, - cwd: &Arc, - path: Path, + pub fn open_recursive<'r, 'a: 'r, 'b: 'r, 'c: 'r>( + context: &'a FsContext, + cwd: &'b Arc, + path: Path<'c>, follow: bool, nrecur: u32, - ) -> KResult> { - // too many recursive search layers will cause stack overflow - // so we use 16 for now - if nrecur >= 16 { - return Err(ELOOP); - } + ) -> Pin>> + 'r>> { + Box::pin(async move { + // too many recursive search layers will cause stack overflow + // so we use 16 for now + if nrecur >= 16 { + return Err(ELOOP); + } - let mut cwd = if path.is_absolute() { - context.fsroot.clone() - } else { - cwd.clone() - }; + let mut cwd = if path.is_absolute() { + context.fsroot.clone() + } else { + cwd.clone() + }; - for item in path.iter() { - if let PathComponent::TrailingEmpty = item { - if cwd.data.load().as_ref().is_none() { - return Ok(cwd); + for item in path.iter() { + if let PathComponent::TrailingEmpty = item { + if cwd.data.load().as_ref().is_none() { + return Ok(cwd); + } } - } - cwd = Self::resolve_directory(context, cwd, nrecur)?; + cwd = Self::resolve_directory(context, cwd, nrecur).await?; - match item { - PathComponent::TrailingEmpty | PathComponent::Current => {} // pass - PathComponent::Parent => { - if !cwd.hash_eq(&context.fsroot) { - let parent = cwd.parent().clone(); - cwd = Self::resolve_directory(context, parent, nrecur)?; + match item { + PathComponent::TrailingEmpty | PathComponent::Current => {} // pass + PathComponent::Parent => { + if !cwd.hash_eq(&context.fsroot) { + let parent = cwd.parent().clone(); + cwd = Self::resolve_directory(context, parent, nrecur).await?; + } + continue; + } + PathComponent::Name(name) => { + cwd = cwd.find(name).await?; } - continue; - } - PathComponent::Name(name) => { - cwd = cwd.find(name)?; } } - } - if follow { - let data = cwd.data.load(); + if follow { + let data = cwd.data.load(); - if let Some(data) = data.as_ref() { - if data.flags & D_SYMLINK != 0 { - let data = cwd.data.load(); - let data = data.as_ref().unwrap(); - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); + if let Some(data) = data.as_ref() { + if data.kind == DentryKind::Symlink { + let data = cwd.data.load(); + let data = data.as_ref().unwrap(); + let mut buffer = [0u8; 256]; + let mut buffer = ByteBuffer::new(&mut buffer); - data.inode.readlink(&mut buffer)?; - let path = Path::new(buffer.data())?; + data.inode.readlink(&mut buffer).await?; + let path = Path::new(buffer.data())?; - let parent = cwd.parent().clone(); - cwd = Self::open_recursive(context, &parent, path, true, nrecur + 1)?; + let parent = cwd.parent().clone(); + cwd = + Self::open_recursive(context, &parent, path, true, nrecur + 1).await?; + } } } - } - Ok(cwd) + Ok(cwd) + }) } - pub fn open(context: &FsContext, path: Path, follow_symlinks: bool) -> KResult> { + pub async fn open( + context: &FsContext, + path: Path<'_>, + follow_symlinks: bool, + ) -> KResult> { let cwd = context.cwd.lock().clone(); - Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0) + Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0).await } - pub fn open_at( + pub async fn open_at( context: &FsContext, at: &Arc, - path: Path, + path: Path<'_>, follow_symlinks: bool, ) -> KResult> { - Dentry::open_recursive(context, at, path, follow_symlinks, 0) + Dentry::open_recursive(context, at, path, follow_symlinks, 0).await } pub fn get_path( @@ -405,18 +413,18 @@ impl Dentry { } impl Dentry { - pub fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + pub async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load().format() { - Mode::DIR => Err(EISDIR), - Mode::REG => inode.read(buffer, offset), - Mode::BLK => { + match inode.format() { + Format::DIR => Err(EISDIR), + Format::REG => inode.read(buffer, offset).await, + Format::BLK => { let device = BlockDevice::get(inode.devid()?)?; - Ok(device.read_some(offset, buffer)?.allow_partial()) + Ok(device.read_some(offset, buffer).await?.allow_partial()) } - Mode::CHR => { + Format::CHR => { let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?; device.read(buffer) } @@ -424,32 +432,32 @@ impl Dentry { } } - pub fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { + pub async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.mode.load().format() { - Mode::DIR => Err(EISDIR), - Mode::REG => inode.write(stream, offset), - Mode::BLK => Err(EINVAL), // TODO - Mode::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), + match inode.format() { + Format::DIR => Err(EISDIR), + Format::REG => inode.write(stream, offset).await, + Format::BLK => Err(EINVAL), // TODO + Format::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream), _ => Err(EINVAL), } } - pub fn readdir(&self, offset: usize, mut callback: F) -> KResult + pub async fn readdir(&self, offset: usize, mut for_each_entry: F) -> KResult> where - F: FnMut(&[u8], Ino) -> KResult>, + F: FnMut(&[u8], Ino) -> KResult + Send, { let dir = self.get_inode()?; - dir.do_readdir(offset, &mut callback) + dir.readdir(offset, &mut for_each_entry).await } - pub fn mkdir(&self, mode: Mode) -> KResult<()> { + pub async fn mkdir(&self, perm: Permission) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.mkdir(self, mode) + dir.mkdir(self, perm).await } } @@ -457,50 +465,50 @@ impl Dentry { self.get_inode()?.statx(stat, mask) } - pub fn truncate(&self, size: usize) -> KResult<()> { - self.get_inode()?.truncate(size) + pub async fn truncate(&self, size: usize) -> KResult<()> { + self.get_inode()?.truncate(size).await } - pub fn unlink(self: &Arc) -> KResult<()> { + pub async fn unlink(self: &Arc) -> KResult<()> { if self.get_inode().is_err() { Err(ENOENT) } else { let dir = self.parent().get_inode()?; - dir.unlink(self) + dir.unlink(self).await } } - pub fn symlink(self: &Arc, link: &[u8]) -> KResult<()> { + pub async fn symlink(self: &Arc, link: &[u8]) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.symlink(self, link) + dir.symlink(self, link).await } } - pub fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - self.get_inode()?.readlink(buffer) + pub async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + self.get_inode()?.readlink(buffer).await } - pub fn mknod(&self, mode: Mode, devid: DevId) -> KResult<()> { + pub async fn mknod(&self, mode: Mode, devid: DeviceId) -> KResult<()> { if self.get_inode().is_ok() { Err(EEXIST) } else { let dir = self.parent().get_inode()?; - dir.mknod(self, mode, devid) + dir.mknod(self, mode, devid).await } } - pub fn chmod(&self, mode: Mode) -> KResult<()> { - self.get_inode()?.chmod(mode) + pub async fn chmod(&self, mode: Mode) -> KResult<()> { + self.get_inode()?.chmod(mode).await } - pub fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - self.get_inode()?.chown(uid, gid) + pub async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { + self.get_inode()?.chown(uid, gid).await } - pub fn rename(self: &Arc, new: &Arc, flags: RenameFlags) -> KResult<()> { + pub async fn rename(self: &Arc, new: &Arc, flags: RenameFlags) -> KResult<()> { if Arc::ptr_eq(self, new) { return Ok(()); } @@ -509,22 +517,19 @@ impl Dentry { let new_parent = new.parent().get_inode()?; // If the two dentries are not in the same filesystem, return EXDEV. - if !Weak::ptr_eq(&old_parent.vfs, &new_parent.vfs) { + if old_parent.sbref().eq(&new_parent.sbref()) { Err(PosixError::EXDEV)?; } - let vfs = old_parent.vfs.upgrade().ok_or(EIO)?; - let rename_data = RenameData { old_dentry: self, new_dentry: new, new_parent, - vfs, is_exchange: flags.contains(RenameFlags::RENAME_EXCHANGE), no_replace: flags.contains(RenameFlags::RENAME_NOREPLACE), }; // Delegate to the parent directory's rename implementation - old_parent.rename(rename_data) + old_parent.rename(rename_data).await } } diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index 188a1cfc..e2491235 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,7 +1,5 @@ -use super::{Dentry, Inode}; +use super::Dentry; use crate::kernel::constants::ENOENT; -use crate::kernel::task::block_on; -use crate::kernel::vfs::inode::Mode; use crate::rcu::RCUPointer; use crate::{ prelude::*, @@ -41,27 +39,14 @@ pub fn d_find_fast(dentry: &Dentry) -> Option> { /// Call `lookup()` on the parent inode to try find if the dentry points to a valid inode /// /// Silently fail without any side effects -pub fn d_try_revalidate(dentry: &Arc) { - let _lock = block_on(D_EXCHANGE_LOCK.lock()); - - (|| -> KResult<()> { - let parent = dentry.parent().get_inode()?; - let inode = parent.lookup(dentry)?.ok_or(ENOENT)?; +pub async fn d_try_revalidate(dentry: &Arc) -> KResult<()> { + let _lock = D_EXCHANGE_LOCK.lock().await; - d_save(dentry, inode) - })() - .unwrap_or_default(); -} + let parent = dentry.parent().get_inode()?; + let inode = parent.lookup(dentry).await?.ok_or(ENOENT)?; -/// Save the inode to the dentry. -/// -/// Dentry flags will be determined by the inode's mode. -pub fn d_save(dentry: &Arc, inode: Arc) -> KResult<()> { - match inode.mode.load().format() { - Mode::DIR => dentry.save_dir(inode), - Mode::LNK => dentry.save_symlink(inode), - _ => dentry.save_reg(inode), - } + dentry.fill(inode); + Ok(()) } /// Replace the old dentry with the new one in the dcache diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs index 6386ba92..96526ee9 100644 --- a/src/kernel/vfs/file/inode_file.rs +++ b/src/kernel/vfs/file/inode_file.rs @@ -5,13 +5,13 @@ use crate::{ constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, vfs::{ dentry::Dentry, - inode::{Inode, Mode, WriteOffset}, + inode::{Inode, InodeUse, WriteOffset}, + types::Format, }, }, prelude::KResult, }; use alloc::sync::Arc; -use core::{ops::ControlFlow, sync::atomic::Ordering}; use eonix_sync::Mutex; use posix_types::{ getdent::{UserDirent, UserDirent64}, @@ -25,7 +25,7 @@ pub struct InodeFile { pub a: bool, /// Only a few modes those won't possibly change are cached here to speed up file operations. /// Specifically, `S_IFMT` masked bits. - pub mode: Mode, + pub format: Format, cursor: Mutex, dentry: Arc, } @@ -34,12 +34,7 @@ impl InodeFile { pub fn new(dentry: Arc, flags: OpenFlags) -> File { // SAFETY: `dentry` used to create `InodeFile` is valid. // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let cached_mode = dentry - .get_inode() - .expect("`dentry` is invalid") - .mode - .load() - .format(); + let format = dentry.inode().expect("dentry should be invalid").format(); let (r, w, a) = flags.as_rwa(); @@ -50,15 +45,15 @@ impl InodeFile { r, w, a, - mode: cached_mode, + format, cursor: Mutex::new(0), }), ) } pub fn sendfile_check(&self) -> KResult<()> { - match self.mode { - Mode::REG | Mode::BLK => Ok(()), + match self.format { + Format::REG | Format::BLK => Ok(()), _ => Err(EBADF), } } @@ -70,21 +65,19 @@ impl InodeFile { let mut cursor = self.cursor.lock().await; - if self.a { - let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?; + let (offset, update_offset) = match (self.a, offset) { + (true, _) => (WriteOffset::End(&mut cursor), None), + (false, Some(offset)) => (WriteOffset::Position(offset), None), + (false, None) => (WriteOffset::Position(*cursor), Some(&mut *cursor)), + }; - Ok(nwrote) - } else { - let nwrote = if let Some(offset) = offset { - self.dentry.write(stream, WriteOffset::Position(offset))? - } else { - let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?; - *cursor += nwrote; - nwrote - }; - - Ok(nwrote) + let nr_write = self.dentry.write(stream, offset).await?; + + if let Some(update_offset) = update_offset { + *update_offset += nr_write; } + + Ok(nr_write) } pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option) -> KResult { @@ -92,24 +85,20 @@ impl InodeFile { return Err(EBADF); } - let nread = if let Some(offset) = offset { - let nread = self.dentry.read(buffer, offset)?; - nread - } else { - let mut cursor = self.cursor.lock().await; - - let nread = self.dentry.read(buffer, *cursor)?; + if let Some(offset) = offset { + return Ok(self.dentry.read(buffer, offset).await?); + } - *cursor += nread; - nread - }; + let mut cursor = self.cursor.lock().await; + let nread = self.dentry.read(buffer, *cursor).await?; + *cursor += nread; Ok(nread) } } impl File { - pub fn get_inode(&self) -> KResult>> { + pub fn get_inode(&self) -> KResult>> { if let FileType::Inode(inode_file) = &**self { Ok(Some(inode_file.dentry.get_inode()?)) } else { @@ -124,27 +113,30 @@ impl File { let mut cursor = inode_file.cursor.lock().await; - let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { - // + 1 for filename length padding '\0', + 1 for d_type. - let real_record_len = core::mem::size_of::() + filename.len() + 2; + let nread = inode_file + .dentry + .readdir(*cursor, |filename, ino| { + // + 1 for filename length padding '\0', + 1 for d_type. + let real_record_len = core::mem::size_of::() + filename.len() + 2; - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } + if buffer.available() < real_record_len { + return Ok(false); + } - let record = UserDirent { - d_ino: ino as u32, - d_off: 0, - d_reclen: real_record_len as u16, - d_name: [0; 0], - }; + let record = UserDirent { + d_ino: ino.as_raw() as u32, + d_off: 0, + d_reclen: real_record_len as u16, + d_name: [0; 0], + }; - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0, 0])?.ok_or(EFAULT)?; + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0, 0])?.ok_or(EFAULT)?; - Ok(ControlFlow::Continue(())) - })?; + Ok(true) + }) + .await??; *cursor += nread; Ok(()) @@ -157,28 +149,31 @@ impl File { let mut cursor = inode_file.cursor.lock().await; - let nread = inode_file.dentry.readdir(*cursor, |filename, ino| { - // Filename length + 1 for padding '\0' - let real_record_len = core::mem::size_of::() + filename.len() + 1; + let nread = inode_file + .dentry + .readdir(*cursor, |filename, ino| { + // Filename length + 1 for padding '\0' + let real_record_len = core::mem::size_of::() + filename.len() + 1; - if buffer.available() < real_record_len { - return Ok(ControlFlow::Break(())); - } + if buffer.available() < real_record_len { + return Ok(false); + } - let record = UserDirent64 { - d_ino: ino, - d_off: 0, - d_reclen: real_record_len as u16, - d_type: 0, - d_name: [0; 0], - }; + let record = UserDirent64 { + d_ino: ino.as_raw(), + d_off: 0, + d_reclen: real_record_len as u16, + d_type: 0, + d_name: [0; 0], + }; - buffer.copy(&record)?.ok_or(EFAULT)?; - buffer.fill(filename)?.ok_or(EFAULT)?; - buffer.fill(&[0])?.ok_or(EFAULT)?; + buffer.copy(&record)?.ok_or(EFAULT)?; + buffer.fill(filename)?.ok_or(EFAULT)?; + buffer.fill(&[0])?.ok_or(EFAULT)?; - Ok(ControlFlow::Continue(())) - })?; + Ok(true) + }) + .await??; *cursor += nread; Ok(()) @@ -196,7 +191,7 @@ impl File { SeekOption::Set(n) => n, SeekOption::End(off) => { let inode = inode_file.dentry.get_inode()?; - let size = inode.size.load(Ordering::Relaxed) as usize; + let size = inode.info().lock().size as usize; size.checked_add_signed(off).ok_or(EOVERFLOW)? } }; diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index b457a425..1862a3e1 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,6 +1,6 @@ use super::{ file::{File, InodeFile, Pipe}, - inode::Mode, + types::{Format, Permission}, Spin, TerminalFile, }; use crate::kernel::{ @@ -280,26 +280,31 @@ impl FileArray { Ok((read_fd, write_fd)) } - pub fn open(&self, dentry: &Arc, flags: OpenFlags, mode: Mode) -> KResult { - dentry.open_check(flags, mode)?; + pub async fn open( + &self, + dentry: &Arc, + flags: OpenFlags, + perm: Permission, + ) -> KResult { + dentry.open_check(flags, perm).await?; let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let file_format = inode.mode.load().format(); + let file_format = inode.format(); match (flags.directory(), file_format, flags.write()) { - (true, Mode::DIR, _) => {} + (true, Format::DIR, _) => {} (true, _, _) => return Err(ENOTDIR), - (false, Mode::DIR, true) => return Err(EISDIR), + (false, Format::DIR, true) => return Err(EISDIR), _ => {} } - if flags.truncate() && flags.write() && file_format.is_reg() { - inode.truncate(0)?; + if flags.truncate() && flags.write() && file_format == Format::REG { + inode.truncate(0).await?; } - let file = if file_format.is_chr() { + let file = if file_format == Format::CHR { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; device.open(flags)? } else { diff --git a/src/kernel/vfs/inode.rs b/src/kernel/vfs/inode.rs deleted file mode 100644 index 52529f84..00000000 --- a/src/kernel/vfs/inode.rs +++ /dev/null @@ -1,494 +0,0 @@ -use super::{dentry::Dentry, vfs::Vfs, DevId}; -use crate::io::Stream; -use crate::kernel::constants::{ - EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, - STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFBLK, S_IFCHR, - S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, -}; -use crate::kernel::mem::PageCache; -use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; -use crate::kernel::task::block_on; -use crate::kernel::timer::Instant; -use crate::{io::Buffer, prelude::*}; -use alloc::sync::{Arc, Weak}; -use core::{ - mem::MaybeUninit, - ops::ControlFlow, - ptr::addr_of_mut, - sync::atomic::{AtomicU32, AtomicU64, Ordering}, -}; -use eonix_sync::RwLock; -use posix_types::stat::StatX; - -pub type Ino = u64; -pub type AtomicIno = AtomicU64; -#[allow(dead_code)] -pub type ISize = u64; -pub type AtomicISize = AtomicU64; -#[allow(dead_code)] -pub type Nlink = u64; -pub type AtomicNlink = AtomicU64; -#[allow(dead_code)] -pub type Uid = u32; -pub type AtomicUid = AtomicU32; -#[allow(dead_code)] -pub type Gid = u32; -pub type AtomicGid = AtomicU32; - -#[derive(Clone, Copy, PartialEq, Eq)] -pub struct Mode(u32); - -pub struct AtomicMode(AtomicU32); - -#[derive(Debug)] -pub struct InodeData { - pub ino: Ino, - pub size: AtomicISize, - pub nlink: AtomicNlink, - - pub uid: AtomicUid, - pub gid: AtomicGid, - pub mode: AtomicMode, - - pub atime: Spin, - pub ctime: Spin, - pub mtime: Spin, - - pub rwsem: RwLock<()>, - - pub vfs: Weak, -} - -impl InodeData { - pub fn new(ino: Ino, vfs: Weak) -> Self { - Self { - ino, - vfs, - atime: Spin::new(Instant::now()), - ctime: Spin::new(Instant::now()), - mtime: Spin::new(Instant::now()), - rwsem: RwLock::new(()), - size: AtomicU64::new(0), - nlink: AtomicNlink::new(0), - uid: AtomicUid::new(0), - gid: AtomicGid::new(0), - mode: AtomicMode::new(0), - } - } -} - -#[allow(dead_code)] -pub trait InodeInner: - Send + Sync + core::ops::Deref + core::ops::DerefMut -{ - fn data(&self) -> &InodeData; - fn data_mut(&mut self) -> &mut InodeData; -} - -pub enum WriteOffset<'end> { - Position(usize), - End(&'end mut usize), -} - -pub struct RenameData<'a, 'b> { - pub old_dentry: &'a Arc, - pub new_dentry: &'b Arc, - pub new_parent: Arc, - pub vfs: Arc, - pub is_exchange: bool, - pub no_replace: bool, -} - -#[allow(unused_variables)] -pub trait Inode: Send + Sync + InodeInner + Any { - fn is_dir(&self) -> bool { - self.mode.load().is_dir() - } - - fn lookup(&self, dentry: &Arc) -> KResult>> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn creat(&self, at: &Arc, mode: Mode) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn unlink(&self, at: &Arc) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn devid(&self) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - Err(if self.is_dir() { EISDIR } else { EINVAL }) - } - - fn truncate(&self, length: usize) -> KResult<()> { - Err(if self.is_dir() { EISDIR } else { EPERM }) - } - - fn rename(&self, rename_data: RenameData) -> KResult<()> { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn do_readdir( - &self, - offset: usize, - callback: &mut dyn FnMut(&[u8], Ino) -> KResult>, - ) -> KResult { - Err(if !self.is_dir() { ENOTDIR } else { EPERM }) - } - - fn chmod(&self, mode: Mode) -> KResult<()> { - Err(EPERM) - } - - fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - Err(EPERM) - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } - - fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { - // Safety: ffi should have checked reference - let vfs = self.vfs.upgrade().expect("Vfs is dropped"); - - let size = self.size.load(Ordering::Relaxed); - let mode = self.mode.load(); - - if mask & STATX_NLINK != 0 { - stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _; - stat.stx_mask |= STATX_NLINK; - } - - if mask & STATX_ATIME != 0 { - let atime = *self.atime.lock(); - stat.stx_atime = atime.into(); - stat.stx_mask |= STATX_ATIME; - } - - if mask & STATX_MTIME != 0 { - let mtime = *self.mtime.lock(); - stat.stx_mtime = mtime.into(); - stat.stx_mask |= STATX_MTIME; - } - - if mask & STATX_CTIME != 0 { - let ctime = *self.ctime.lock(); - stat.stx_ctime = ctime.into(); - stat.stx_mask |= STATX_CTIME; - } - - if mask & STATX_SIZE != 0 { - stat.stx_size = self.size.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_SIZE; - } - - stat.stx_mode = 0; - if mask & STATX_MODE != 0 { - stat.stx_mode |= mode.non_format_bits() as u16; - stat.stx_mask |= STATX_MODE; - } - - if mask & STATX_TYPE != 0 { - stat.stx_mode |= mode.format_bits() as u16; - if mode.is_blk() || mode.is_chr() { - let devid = self.devid(); - stat.stx_rdev_major = (devid? >> 8) & 0xff; - stat.stx_rdev_minor = devid? & 0xff; - } - stat.stx_mask |= STATX_TYPE; - } - - if mask & STATX_INO != 0 { - stat.stx_ino = self.ino as _; - stat.stx_mask |= STATX_INO; - } - - if mask & STATX_BLOCKS != 0 { - stat.stx_blocks = (size + 512 - 1) / 512; - stat.stx_blksize = vfs.io_blksize() as _; - stat.stx_mask |= STATX_BLOCKS; - } - - if mask & STATX_UID != 0 { - stat.stx_uid = self.uid.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_UID; - } - - if mask & STATX_GID != 0 { - stat.stx_gid = self.gid.load(Ordering::Relaxed) as _; - stat.stx_mask |= STATX_GID; - } - - let fsdev = vfs.fs_devid(); - stat.stx_dev_major = (fsdev >> 8) & 0xff; - stat.stx_dev_minor = fsdev & 0xff; - - // TODO: support more attributes - stat.stx_attributes_mask = 0; - - Ok(()) - } - - fn new_locked(ino: Ino, vfs: Weak, f: F) -> Arc - where - Self: Sized, - F: FnOnce(*mut Self, &()), - { - let mut uninit = Arc::::new_uninit(); - - let uninit_mut = Arc::get_mut(&mut uninit).unwrap(); - - // Safety: `idata` is owned by `uninit` - let idata = unsafe { - addr_of_mut!(*(*uninit_mut.as_mut_ptr()).data_mut()) - .cast::>() - .as_mut() - .unwrap() - }; - - idata.write(InodeData::new(ino, vfs)); - - f( - uninit_mut.as_mut_ptr(), - // SAFETY: `idata` is initialized and we will never move the lock. - &block_on(unsafe { idata.assume_init_ref() }.rwsem.read()), - ); - - // Safety: `uninit` is initialized - unsafe { uninit.assume_init() } - } -} - -// TODO: define multiple inode structs a time -macro_rules! define_struct_inode { - ($v:vis struct $inode_t:ident;) => { - $v struct $inode_t { - /// Do not use this directly - idata: $crate::kernel::vfs::inode::InodeData, - } - - impl core::ops::Deref for $inode_t { - type Target = $crate::kernel::vfs::inode::InodeData; - - fn deref(&self) -> &Self::Target { - &self.idata - } - } - - impl core::ops::DerefMut for $inode_t { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.idata - } - } - - impl $crate::kernel::vfs::inode::InodeInner for $inode_t { - fn data(&self) -> &$crate::kernel::vfs::inode::InodeData { - &self.idata - } - - fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData { - &mut self.idata - } - } - }; - ($v:vis struct $inode_t:ident { $($vis:vis $name:ident: $type:ty,)* }) => { - $v struct $inode_t { - /// Do not use this directly - idata: $crate::kernel::vfs::inode::InodeData, - $($vis $name: $type,)* - } - - impl core::ops::Deref for $inode_t { - type Target = $crate::kernel::vfs::inode::InodeData; - - fn deref(&self) -> &Self::Target { - &self.idata - } - } - - impl core::ops::DerefMut for $inode_t { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.idata - } - } - - impl $crate::kernel::vfs::inode::InodeInner for $inode_t { - fn data(&self) -> &$crate::kernel::vfs::inode::InodeData { - &self.idata - } - - fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData { - &mut self.idata - } - } - }; -} - -pub(crate) use define_struct_inode; - -impl Mode { - pub const REG: Self = Self(S_IFREG); - pub const DIR: Self = Self(S_IFDIR); - pub const LNK: Self = Self(S_IFLNK); - pub const BLK: Self = Self(S_IFBLK); - pub const CHR: Self = Self(S_IFCHR); - - pub const fn new(bits: u32) -> Self { - Self(bits) - } - - pub const fn is_blk(&self) -> bool { - (self.0 & S_IFMT) == S_IFBLK - } - - pub const fn is_chr(&self) -> bool { - (self.0 & S_IFMT) == S_IFCHR - } - - pub const fn is_reg(&self) -> bool { - (self.0 & S_IFMT) == S_IFREG - } - - pub const fn is_dir(&self) -> bool { - (self.0 & S_IFMT) == S_IFDIR - } - - pub const fn is_lnk(&self) -> bool { - (self.0 & S_IFMT) == S_IFLNK - } - - pub const fn bits(&self) -> u32 { - self.0 - } - - pub const fn format_bits(&self) -> u32 { - self.0 & S_IFMT - } - - pub const fn format(&self) -> Self { - Self::new(self.format_bits()) - } - - pub const fn non_format_bits(&self) -> u32 { - self.0 & !S_IFMT - } - - pub const fn non_format(&self) -> Self { - Self::new(self.non_format_bits()) - } - - pub const fn perm(self, perm: u32) -> Self { - Self::new((self.0 & !0o777) | (perm & 0o777)) - } - - pub const fn set_perm(&mut self, perm: u32) { - *self = self.perm(perm); - } - - pub const fn mask_perm(&mut self, perm_mask: u32) { - let perm_mask = perm_mask & 0o777; - let self_perm = self.non_format_bits() & 0o777; - - *self = self.perm(self_perm & perm_mask); - } -} - -impl AtomicMode { - pub const fn new(bits: u32) -> Self { - Self(AtomicU32::new(bits)) - } - - pub const fn from(mode: Mode) -> Self { - Self::new(mode.0) - } - - pub fn load(&self) -> Mode { - Mode(self.0.load(Ordering::Relaxed)) - } - - pub fn store(&self, mode: Mode) { - self.0.store(mode.0, Ordering::Relaxed); - } -} - -impl core::fmt::Debug for AtomicMode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - f.debug_struct("AtomicMode") - .field("bits", &self.load().0) - .finish() - } -} - -impl core::fmt::Debug for Mode { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let format_name = match self.format() { - Mode::REG => "REG", - Mode::DIR => "DIR", - Mode::LNK => "LNK", - Mode::BLK => "BLK", - Mode::CHR => "CHR", - _ => "UNK", - }; - - match self.non_format_bits() & !0o777 { - 0 => write!( - f, - "Mode({format_name}, {perm:#o})", - perm = self.non_format_bits() - )?, - rem => write!( - f, - "Mode({format_name}, {perm:#o}, rem={rem:#x})", - perm = self.non_format_bits() & 0o777 - )?, - } - - Ok(()) - } -} - -impl FromSyscallArg for Mode { - fn from_arg(value: usize) -> Self { - Mode::new(value as u32) - } -} - -impl SyscallRetVal for Mode { - fn into_retval(self) -> Option { - Some(self.bits() as usize) - } -} diff --git a/src/kernel/vfs/inode/ino.rs b/src/kernel/vfs/inode/ino.rs new file mode 100644 index 00000000..b5ee7ac0 --- /dev/null +++ b/src/kernel/vfs/inode/ino.rs @@ -0,0 +1,31 @@ +use core::{ + fmt::{Debug, Display, Formatter}, + sync::atomic::AtomicU64, +}; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Ino(u64); + +pub struct AtomicIno(AtomicU64); + +impl Ino { + pub const fn new(ino: u64) -> Self { + Self(ino) + } + + pub const fn as_raw(self) -> u64 { + self.0 + } +} + +impl Debug for Ino { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "Ino({})", self.0) + } +} + +impl Display for Ino { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} diff --git a/src/kernel/vfs/inode/inode.rs b/src/kernel/vfs/inode/inode.rs new file mode 100644 index 00000000..786a31fe --- /dev/null +++ b/src/kernel/vfs/inode/inode.rs @@ -0,0 +1,389 @@ +use alloc::boxed::Box; +use core::{ + any::Any, + future::Future, + marker::Unsize, + ops::{CoerceUnsized, Deref}, + pin::Pin, +}; +use eonix_sync::Spin; + +use alloc::sync::{Arc, Weak}; +use async_trait::async_trait; + +use crate::{ + io::{Buffer, Stream}, + kernel::{ + constants::{EINVAL, EPERM}, + mem::PageCache, + timer::Instant, + vfs::{ + dentry::Dentry, + types::{DeviceId, Format, Mode, Permission}, + SbRef, SbUse, SuperBlock, + }, + }, + prelude::KResult, +}; + +use super::{Ino, RenameData, WriteOffset}; + +pub trait InodeOps: Sized + Send + Sync + 'static { + type SuperBlock: SuperBlock + Sized; + + fn ino(&self) -> Ino; + fn format(&self) -> Format; + fn info(&self) -> &Spin; + + fn super_block(&self) -> &SbRef; + + fn page_cache(&self) -> Option<&PageCache>; +} + +#[allow(unused_variables)] +pub trait InodeDirOps: InodeOps { + fn lookup( + &self, + dentry: &Arc, + ) -> impl Future>>> + Send { + async { Err(EPERM) } + } + + /// Read directory entries and call the given closure for each entry. + /// + /// # Returns + /// - Ok(count): The number of entries read. + /// - Ok(Err(err)): Some error occurred while calling the given closure. + /// - Err(err): An error occurred while reading the directory. + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> impl Future>> + Send + 'r { + async { Err(EPERM) } + } + + fn create( + &self, + at: &Arc, + mode: Permission, + ) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn mkdir(&self, at: &Dentry, mode: Permission) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn mknod( + &self, + at: &Dentry, + mode: Mode, + dev: DeviceId, + ) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn unlink(&self, at: &Arc) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn symlink(&self, at: &Arc, target: &[u8]) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn rename(&self, rename_data: RenameData<'_, '_>) -> impl Future> + Send { + async { Err(EPERM) } + } +} + +#[allow(unused_variables)] +pub trait InodeFileOps: InodeOps { + fn read( + &self, + buffer: &mut dyn Buffer, + offset: usize, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn read_direct( + &self, + buffer: &mut dyn Buffer, + offset: usize, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn write( + &self, + stream: &mut dyn Stream, + offset: WriteOffset<'_>, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn write_direct( + &self, + stream: &mut dyn Stream, + offset: usize, + ) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn devid(&self) -> KResult { + Err(EINVAL) + } + + fn readlink(&self, buffer: &mut dyn Buffer) -> impl Future> + Send { + async { Err(EINVAL) } + } + + fn truncate(&self, length: usize) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn chmod(&self, perm: Permission) -> impl Future> + Send { + async { Err(EPERM) } + } + + fn chown(&self, uid: u32, gid: u32) -> impl Future> + Send { + async { Err(EPERM) } + } +} + +#[async_trait] +pub trait InodeDir { + async fn lookup(&self, dentry: &Arc) -> KResult>>; + async fn create(&self, at: &Arc, perm: Permission) -> KResult<()>; + async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()>; + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()>; + async fn unlink(&self, at: &Arc) -> KResult<()>; + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()>; + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()>; + + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> Pin>> + Send + 'r>>; +} + +#[async_trait] +pub trait InodeFile { + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; + async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult; + async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult; + fn devid(&self) -> KResult; + async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult; + async fn truncate(&self, length: usize) -> KResult<()>; + async fn chmod(&self, mode: Mode) -> KResult<()>; + async fn chown(&self, uid: u32, gid: u32) -> KResult<()>; +} + +pub trait Inode: InodeFile + InodeDir + Any + Send + Sync + 'static { + fn ino(&self) -> Ino; + fn format(&self) -> Format; + fn info(&self) -> &Spin; + + // TODO: This might should be removed... Temporary workaround for now. + fn page_cache(&self) -> Option<&PageCache>; + + fn sbref(&self) -> SbRef; + fn sbget(&self) -> KResult>; +} + +#[async_trait] +impl InodeFile for T +where + T: InodeFileOps, +{ + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + self.read(buffer, offset).await + } + + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + self.read_direct(buffer, offset).await + } + + async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { + self.write(stream, offset).await + } + + async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { + self.write_direct(stream, offset).await + } + + fn devid(&self) -> KResult { + self.devid() + } + + async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + self.readlink(buffer).await + } + + async fn truncate(&self, length: usize) -> KResult<()> { + self.truncate(length).await + } + + async fn chmod(&self, mode: Mode) -> KResult<()> { + self.chmod(Permission::new(mode.non_format_bits())).await + } + + async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { + self.chown(uid, gid).await + } +} + +#[async_trait] +impl InodeDir for T +where + T: InodeDirOps, +{ + async fn lookup(&self, dentry: &Arc) -> KResult>> { + self.lookup(dentry).await + } + + async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { + self.create(at, perm).await + } + + async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { + self.mkdir(at, perm).await + } + + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + self.mknod(at, mode, dev).await + } + + async fn unlink(&self, at: &Arc) -> KResult<()> { + self.unlink(at).await + } + + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { + self.symlink(at, target).await + } + + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { + self.rename(rename_data).await + } + + fn readdir<'r, 'a: 'r, 'b: 'r>( + &'a self, + offset: usize, + callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> Pin>> + Send + 'r>> { + Box::pin(self.readdir(offset, callback)) + } +} + +impl Inode for T +where + T: InodeOps + InodeFile + InodeDir, +{ + fn ino(&self) -> Ino { + self.ino() + } + + fn format(&self) -> Format { + self.format() + } + + fn info(&self) -> &Spin { + self.info() + } + + fn page_cache(&self) -> Option<&PageCache> { + self.page_cache() + } + + fn sbref(&self) -> SbRef { + self.super_block().clone() + } + + fn sbget(&self) -> KResult> { + self.super_block().get().map(|sb| sb as _) + } +} + +#[derive(Debug, Clone)] +pub struct InodeInfo { + pub size: u64, + pub nlink: u64, + + pub uid: u32, + pub gid: u32, + pub perm: Permission, + + pub atime: Instant, + pub ctime: Instant, + pub mtime: Instant, +} + +#[derive(Clone)] +pub struct InodeRef(Weak) +where + I: Inode + ?Sized; + +pub struct InodeUse(Arc) +where + I: Inode + ?Sized; + +impl InodeUse +where + I: Inode, +{ + pub fn new(inode: I) -> Self { + Self(Arc::new(inode)) + } + + pub fn new_cyclic(inode_func: impl FnOnce(&Weak) -> I) -> Self { + Self(Arc::new_cyclic(inode_func)) + } +} + +impl InodeUse +where + I: Inode + ?Sized, +{ + pub fn as_raw(&self) -> *const I { + Arc::as_ptr(&self.0) + } +} + +impl CoerceUnsized> for InodeUse +where + T: Inode + Unsize + ?Sized, + U: Inode + ?Sized, +{ +} + +impl Clone for InodeUse +where + I: Inode + ?Sized, +{ + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl core::fmt::Debug for InodeUse +where + I: Inode + ?Sized, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "InodeUse(ino={})", self.ino()) + } +} + +impl Deref for InodeUse +where + I: Inode + ?Sized, +{ + type Target = I; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} diff --git a/src/kernel/vfs/inode/mod.rs b/src/kernel/vfs/inode/mod.rs new file mode 100644 index 00000000..786d30fa --- /dev/null +++ b/src/kernel/vfs/inode/mod.rs @@ -0,0 +1,10 @@ +mod ino; +mod inode; +mod ops; +mod statx; + +pub use ino::Ino; +pub use inode::{ + Inode, InodeDir, InodeDirOps, InodeFile, InodeFileOps, InodeInfo, InodeOps, InodeRef, InodeUse, +}; +pub use ops::{RenameData, WriteOffset}; diff --git a/src/kernel/vfs/inode/ops.rs b/src/kernel/vfs/inode/ops.rs new file mode 100644 index 00000000..baab1a80 --- /dev/null +++ b/src/kernel/vfs/inode/ops.rs @@ -0,0 +1,18 @@ +use alloc::sync::Arc; + +use crate::kernel::vfs::dentry::Dentry; + +use super::{inode::InodeUse, Inode}; + +pub enum WriteOffset<'end> { + Position(usize), + End(&'end mut usize), +} + +pub struct RenameData<'a, 'b> { + pub old_dentry: &'a Arc, + pub new_dentry: &'b Arc, + pub new_parent: InodeUse, + pub is_exchange: bool, + pub no_replace: bool, +} diff --git a/src/kernel/vfs/inode/statx.rs b/src/kernel/vfs/inode/statx.rs new file mode 100644 index 00000000..a85ef3af --- /dev/null +++ b/src/kernel/vfs/inode/statx.rs @@ -0,0 +1,97 @@ +use posix_types::stat::StatX; + +use crate::{ + kernel::{ + constants::{ + STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME, + STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, + }, + vfs::types::Format, + }, + prelude::KResult, +}; + +use super::{inode::InodeUse, Inode}; + +impl InodeUse +where + I: Inode + ?Sized, +{ + pub fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { + let sb = self.sbget()?; + let info = self.info().lock(); + + if mask & STATX_NLINK != 0 { + stat.stx_nlink = info.nlink as _; + stat.stx_mask |= STATX_NLINK; + } + + if mask & STATX_ATIME != 0 { + stat.stx_atime = info.atime.into(); + stat.stx_mask |= STATX_ATIME; + } + + if mask & STATX_MTIME != 0 { + stat.stx_mtime = info.mtime.into(); + stat.stx_mask |= STATX_MTIME; + } + + if mask & STATX_CTIME != 0 { + stat.stx_ctime = info.ctime.into(); + stat.stx_mask |= STATX_CTIME; + } + + if mask & STATX_SIZE != 0 { + stat.stx_size = info.size as _; + stat.stx_mask |= STATX_SIZE; + } + + stat.stx_mode = 0; + if mask & STATX_MODE != 0 { + stat.stx_mode |= info.perm.bits() as u16; + stat.stx_mask |= STATX_MODE; + } + + if mask & STATX_TYPE != 0 { + let format = self.format(); + + stat.stx_mode |= format.as_raw() as u16; + if let Format::BLK | Format::CHR = format { + let devid = self.devid()?; + stat.stx_rdev_major = devid.major as _; + stat.stx_rdev_minor = devid.minor as _; + } + stat.stx_mask |= STATX_TYPE; + } + + if mask & STATX_INO != 0 { + stat.stx_ino = self.ino().as_raw(); + stat.stx_mask |= STATX_INO; + } + + if mask & STATX_BLOCKS != 0 { + stat.stx_blocks = (info.size + 512 - 1) / 512; + stat.stx_blksize = sb.info.io_blksize as _; + stat.stx_mask |= STATX_BLOCKS; + } + + if mask & STATX_UID != 0 { + stat.stx_uid = info.uid; + stat.stx_mask |= STATX_UID; + } + + if mask & STATX_GID != 0 { + stat.stx_gid = info.gid; + stat.stx_mask |= STATX_GID; + } + + let fsdev = sb.info.device_id; + stat.stx_dev_major = fsdev.major as _; + stat.stx_dev_minor = fsdev.minor as _; + + // TODO: support more attributes + stat.stx_attributes_mask = 0; + + Ok(()) + } +} diff --git a/src/kernel/vfs/mod.rs b/src/kernel/vfs/mod.rs index f62cb9b9..5b8eca5a 100644 --- a/src/kernel/vfs/mod.rs +++ b/src/kernel/vfs/mod.rs @@ -1,31 +1,31 @@ -use crate::prelude::*; -use alloc::sync::Arc; -use dentry::Dentry; -use eonix_sync::LazyLock; -use inode::Mode; - pub mod dentry; mod file; pub mod filearray; pub mod inode; pub mod mount; -pub mod vfs; +mod superblock; +pub mod types; -pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; +use crate::prelude::*; +use alloc::sync::Arc; +use dentry::Dentry; +use eonix_sync::LazyLock; +use types::Permission; -pub type DevId = u32; +pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile}; +pub use superblock::{SbRef, SbUse, SuperBlock, SuperBlockInfo, SuperBlockLock}; pub struct FsContext { pub fsroot: Arc, pub cwd: Spin>, - pub umask: Spin, + pub umask: Spin, } static GLOBAL_FS_CONTEXT: LazyLock> = LazyLock::new(|| { Arc::new(FsContext { fsroot: Dentry::root().clone(), cwd: Spin::new(Dentry::root().clone()), - umask: Spin::new(Mode::new(0o022)), + umask: Spin::new(Permission::new(0o755)), }) }); diff --git a/src/kernel/vfs/mount.rs b/src/kernel/vfs/mount.rs index 0b38e0c0..213acae9 100644 --- a/src/kernel/vfs/mount.rs +++ b/src/kernel/vfs/mount.rs @@ -1,11 +1,15 @@ use super::{ dentry::{dcache, Dentry, DROOT}, - inode::Inode, - vfs::Vfs, + inode::{Inode, InodeUse}, + SbUse, SuperBlock, +}; +use crate::kernel::{ + constants::{EEXIST, ENODEV, ENOTDIR}, + task::block_on, }; -use crate::kernel::constants::{EEXIST, ENODEV, ENOTDIR}; use crate::prelude::*; use alloc::{collections::btree_map::BTreeMap, string::ToString as _, sync::Arc}; +use async_trait::async_trait; use eonix_sync::LazyLock; pub const MS_RDONLY: u64 = 1 << 0; @@ -30,17 +34,21 @@ static MOUNT_CREATORS: Spin>> = Spin::new static MOUNTS: Spin, MountPointData)>> = Spin::new(vec![]); pub struct Mount { - _vfs: Arc, + sb: SbUse, root: Arc, } impl Mount { - pub fn new(mp: &Dentry, vfs: Arc, root_inode: Arc) -> KResult { + pub fn new( + mp: &Dentry, + sb: SbUse, + root_inode: InodeUse, + ) -> KResult { let root_dentry = Dentry::create(mp.parent().clone(), &mp.get_name()); - root_dentry.save_dir(root_inode)?; + root_dentry.fill(root_inode); Ok(Self { - _vfs: vfs, + sb, root: root_dentry, }) } @@ -53,9 +61,10 @@ impl Mount { unsafe impl Send for Mount {} unsafe impl Sync for Mount {} +#[async_trait] pub trait MountCreator: Send + Sync { fn check_signature(&self, first_block: &[u8]) -> KResult; - fn create_mount(&self, source: &str, flags: u64, mp: &Arc) -> KResult; + async fn create_mount(&self, source: &str, flags: u64, mp: &Arc) -> KResult; } pub fn register_filesystem(fstype: &str, creator: Arc) -> KResult<()> { @@ -77,7 +86,7 @@ struct MountPointData { flags: u64, } -pub fn do_mount( +pub async fn do_mount( mountpoint: &Arc, source: &str, mountpoint_str: &str, @@ -101,7 +110,7 @@ pub fn do_mount( let creators = { MOUNT_CREATORS.lock() }; creators.get(fstype).ok_or(ENODEV)?.clone() }; - let mount = creator.create_mount(source, flags, mountpoint)?; + let mount = creator.create_mount(source, flags, mountpoint).await?; let root_dentry = mount.root().clone(); @@ -165,8 +174,7 @@ impl Dentry { .cloned() .expect("tmpfs not registered."); - let mount = creator - .create_mount(&source, mount_flags, &DROOT) + let mount = block_on(creator.create_mount(&source, mount_flags, &DROOT)) .expect("Failed to create root mount."); let root_dentry = mount.root().clone(); diff --git a/src/kernel/vfs/superblock.rs b/src/kernel/vfs/superblock.rs new file mode 100644 index 00000000..85b28c01 --- /dev/null +++ b/src/kernel/vfs/superblock.rs @@ -0,0 +1,127 @@ +use core::{ + marker::Unsize, + ops::{CoerceUnsized, Deref}, +}; + +use alloc::sync::{Arc, Weak}; +use eonix_sync::RwLock; + +use crate::{kernel::constants::EIO, prelude::KResult}; + +use super::types::DeviceId; + +pub trait SuperBlock: Send + Sync + 'static {} + +#[derive(Debug, Clone)] +pub struct SuperBlockInfo { + pub io_blksize: u32, + pub device_id: DeviceId, + pub read_only: bool, +} + +pub struct SuperBlockLock(()); + +pub struct SuperBlockComplex +where + Backend: SuperBlock + ?Sized, +{ + pub info: SuperBlockInfo, + pub rwsem: RwLock, + pub backend: Backend, +} + +pub struct SbRef(Weak>) +where + S: SuperBlock + ?Sized; + +pub struct SbUse(Arc>) +where + S: SuperBlock + ?Sized; + +impl SbRef +where + S: SuperBlock + ?Sized, +{ + pub fn try_get(&self) -> Option> { + self.0.upgrade().map(|arc| SbUse(arc)) + } + + pub fn get(&self) -> KResult> { + self.try_get().ok_or(EIO) + } + + pub fn from(sb: &SbUse) -> Self { + SbRef(Arc::downgrade(&sb.0)) + } + + pub fn eq(&self, other: &SbRef) -> bool + where + U: SuperBlock + ?Sized, + { + core::ptr::addr_eq(self.0.as_ptr(), other.0.as_ptr()) + } +} + +impl SbUse +where + S: SuperBlock, +{ + pub fn new(info: SuperBlockInfo, backend: S) -> Self { + Self(Arc::new(SuperBlockComplex { + info, + rwsem: RwLock::new(SuperBlockLock(())), + backend, + })) + } + + pub fn new_cyclic(info: SuperBlockInfo, backend_func: impl FnOnce(SbRef) -> S) -> Self { + Self(Arc::new_cyclic(|weak| SuperBlockComplex { + info, + rwsem: RwLock::new(SuperBlockLock(())), + backend: backend_func(SbRef(weak.clone())), + })) + } +} + +impl Clone for SbRef +where + S: SuperBlock + ?Sized, +{ + fn clone(&self) -> Self { + SbRef(self.0.clone()) + } +} + +impl Clone for SbUse +where + S: SuperBlock + ?Sized, +{ + fn clone(&self) -> Self { + SbUse(self.0.clone()) + } +} + +impl CoerceUnsized> for SbRef +where + T: SuperBlock + Unsize + ?Sized, + U: SuperBlock + ?Sized, +{ +} + +impl CoerceUnsized> for SbUse +where + T: SuperBlock + Unsize + ?Sized, + U: SuperBlock + ?Sized, +{ +} + +impl Deref for SbUse +where + S: SuperBlock + ?Sized, +{ + type Target = SuperBlockComplex; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} diff --git a/src/kernel/vfs/types/device_id.rs b/src/kernel/vfs/types/device_id.rs new file mode 100644 index 00000000..cf3ea886 --- /dev/null +++ b/src/kernel/vfs/types/device_id.rs @@ -0,0 +1,36 @@ +use core::fmt::{Debug, Display, Formatter}; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct DeviceId { + pub major: u16, + pub minor: u16, +} + +impl DeviceId { + pub const fn new(major: u16, minor: u16) -> Self { + Self { major, minor } + } + + pub const fn from_raw(raw: u32) -> Self { + Self { + major: (raw >> 16) as u16, + minor: (raw & 0xFFFF) as u16, + } + } + + pub const fn to_raw(self) -> u32 { + ((self.major as u32) << 16) | (self.minor as u32) + } +} + +impl Debug for DeviceId { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "DeviceId({:04x}:{:04x})", self.major, self.minor) + } +} + +impl Display for DeviceId { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + write!(f, "{:04x}:{:04x}", self.major, self.minor) + } +} diff --git a/src/kernel/vfs/types/mod.rs b/src/kernel/vfs/types/mod.rs new file mode 100644 index 00000000..4a7505f7 --- /dev/null +++ b/src/kernel/vfs/types/mod.rs @@ -0,0 +1,5 @@ +mod device_id; +mod mode; + +pub use device_id::DeviceId; +pub use mode::{Format, Mode, Permission}; diff --git a/src/kernel/vfs/types/mode.rs b/src/kernel/vfs/types/mode.rs new file mode 100644 index 00000000..dc1b88ec --- /dev/null +++ b/src/kernel/vfs/types/mode.rs @@ -0,0 +1,169 @@ +use crate::kernel::{ + constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG}, + syscall::{FromSyscallArg, SyscallRetVal}, +}; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Mode(u32); + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum Format { + REG, + DIR, + LNK, + BLK, + CHR, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Permission(u32); + +impl Mode { + pub const fn new(bits: u32) -> Self { + Self(bits) + } + + pub const fn is_blk(&self) -> bool { + (self.0 & S_IFMT) == S_IFBLK + } + + pub const fn is_chr(&self) -> bool { + (self.0 & S_IFMT) == S_IFCHR + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn format_bits(&self) -> u32 { + self.0 & S_IFMT + } + + pub const fn non_format_bits(&self) -> u32 { + self.0 & !S_IFMT + } + + pub fn format(&self) -> Format { + match self.format_bits() { + S_IFREG => Format::REG, + S_IFDIR => Format::DIR, + S_IFLNK => Format::LNK, + S_IFBLK => Format::BLK, + S_IFCHR => Format::CHR, + _ => panic!("unknown format bits: {:#o}", self.format_bits()), + } + } + + pub fn perm(&self) -> Permission { + Permission::new(self.non_format_bits()) + } + + pub const fn non_format(&self) -> Self { + Self::new(self.non_format_bits()) + } + + pub const fn set_perm(&mut self, perm: Permission) { + self.0 = self.format_bits() | perm.bits(); + } +} + +impl Format { + pub const fn as_raw(&self) -> u32 { + match self { + Self::REG => S_IFREG, + Self::DIR => S_IFDIR, + Self::LNK => S_IFLNK, + Self::BLK => S_IFBLK, + Self::CHR => S_IFCHR, + } + } +} + +impl Permission { + const RWX: [&str; 8] = ["---", "--x", "-w-", "-wx", "r--", "r-x", "rw-", "rwx"]; + + pub const fn new(perm_bits: u32) -> Self { + Self(perm_bits & 0o7777) + } + + pub const fn bits(&self) -> u32 { + self.0 + } + + pub const fn mask_with(&self, mask: Self) -> Self { + Self(self.0 & mask.0) + } +} + +impl core::fmt::Debug for Mode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.non_format_bits() & !0o777 { + 0 => write!( + f, + "Mode({format:?}, {perm:#o})", + format = self.format(), + perm = self.non_format_bits() + )?, + rem => write!( + f, + "Mode({format:?}, {perm:#o}, rem={rem:#x})", + format = self.format(), + perm = self.non_format_bits() & 0o777 + )?, + } + + Ok(()) + } +} + +impl core::fmt::Debug for Format { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::REG => write!(f, "REG"), + Self::DIR => write!(f, "DIR"), + Self::LNK => write!(f, "LNK"), + Self::BLK => write!(f, "BLK"), + Self::CHR => write!(f, "CHR"), + } + } +} + +impl core::fmt::Debug for Permission { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let owner = self.0 >> 6 & 0o7; + let group = self.0 >> 3 & 0o7; + let other = self.0 & 0o7; + + write!( + f, + "{}{}{}", + Self::RWX[owner as usize], + Self::RWX[group as usize], + Self::RWX[other as usize] + ) + } +} + +impl FromSyscallArg for Mode { + fn from_arg(value: usize) -> Self { + Mode::new(value as u32) + } +} + +impl SyscallRetVal for Mode { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} + +impl FromSyscallArg for Permission { + fn from_arg(value: usize) -> Self { + Permission::new(value as u32) + } +} + +impl SyscallRetVal for Permission { + fn into_retval(self) -> Option { + Some(self.bits() as usize) + } +} diff --git a/src/kernel/vfs/vfs.rs b/src/kernel/vfs/vfs.rs deleted file mode 100644 index ee66f0b6..00000000 --- a/src/kernel/vfs/vfs.rs +++ /dev/null @@ -1,10 +0,0 @@ -use crate::prelude::*; - -use super::DevId; - -#[allow(dead_code)] -pub trait Vfs: Send + Sync + AsAny { - fn io_blksize(&self) -> usize; - fn fs_devid(&self) -> DevId; - fn is_read_only(&self) -> bool; -} diff --git a/src/lib.rs b/src/lib.rs index 80d24c28..98e196f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,10 +2,12 @@ #![no_main] #![feature(allocator_api)] #![feature(c_size_t)] +#![feature(coerce_unsized)] #![feature(concat_idents)] #![feature(arbitrary_self_types)] #![feature(get_mut_unchecked)] #![feature(macro_metavar_expr)] +#![feature(unsize)] extern crate alloc; @@ -46,8 +48,8 @@ use kernel::{ task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, vfs::{ dentry::Dentry, - inode::Mode, mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, + types::Permission, FsContext, }, CharDevice, @@ -192,16 +194,16 @@ async fn init_process(early_kstack: PRange) { { // We might want the serial initialized as soon as possible. driver::serial::init().unwrap(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; } #[cfg(target_arch = "riscv64")] { driver::serial::init().unwrap(); driver::virtio::init_virtio_devices(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; driver::goldfish_rtc::probe(); } @@ -209,21 +211,26 @@ async fn init_process(early_kstack: PRange) { { driver::serial::init().unwrap(); driver::virtio::init_virtio_devices(); - driver::e1000e::register_e1000e_driver(); - driver::ahci::register_ahci_driver(); + driver::e1000e::register_e1000e_driver().await; + driver::ahci::register_ahci_driver().await; } fs::tmpfs::init(); - fs::procfs::init(); + fs::procfs::init().await; fs::fat32::init(); - fs::ext4::init(); + // fs::ext4::init(); let load_info = { // mount fat32 /mnt directory let fs_context = FsContext::global(); - let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap(); + let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true) + .await + .unwrap(); - mnt_dir.mkdir(Mode::new(0o755)).unwrap(); + mnt_dir + .mkdir(Permission::new(0o755)) + .await + .expect("Failed to create /mnt directory"); do_mount( &mnt_dir, @@ -232,6 +239,7 @@ async fn init_process(early_kstack: PRange) { "fat32", MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID, ) + .await .unwrap(); let init_names = [&b"/init"[..], &b"/sbin/init"[..], &b"/mnt/initsh"[..]]; @@ -239,7 +247,7 @@ async fn init_process(early_kstack: PRange) { let mut init_name = None; let mut init = None; for name in init_names { - if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true) { + if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true).await { if dentry.is_valid() { init_name = Some(CString::new(name).unwrap()); init = Some(dentry); @@ -261,6 +269,7 @@ async fn init_process(early_kstack: PRange) { ]; ProgramLoader::parse(fs_context, init_name, init.clone(), argv, envp) + .await .expect("Failed to parse init program") .load() .await diff --git a/src/prelude.rs b/src/prelude.rs index b3dbe2ce..880489da 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -18,34 +18,6 @@ pub(crate) use crate::kernel::console::{ pub(crate) use alloc::{boxed::Box, string::String, vec, vec::Vec}; -pub(crate) use core::{any::Any, fmt::Write, marker::PhantomData, str}; +pub(crate) use core::{fmt::Write, marker::PhantomData, str}; pub use crate::sync::Spin; - -#[allow(dead_code)] -pub trait AsAny: Send + Sync { - fn as_any(&self) -> &dyn Any; - fn as_any_mut(&mut self) -> &mut dyn Any; -} - -macro_rules! impl_any { - ($t:ty) => { - impl AsAny for $t { - fn as_any(&self) -> &dyn Any { - self - } - - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - } - }; -} - -macro_rules! addr_of_mut_field { - ($pointer:expr, $field:ident) => { - core::ptr::addr_of_mut!((*$pointer).$field) - }; -} - -pub(crate) use {addr_of_mut_field, impl_any}; From ea2122331ee265263a23981e230c76199680d866 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 14 Sep 2025 21:46:19 +0800 Subject: [PATCH 02/25] vfs, rcu: rework path walking with new rcu syntax The old path walking algorithm requires recursion, which is not supported in async rust. So we boxed them all as a temporary solution in previous commits. This would introduce mass overhead even for fast path walks just because we might sleep in `readlink()` and `lookup()` calls. The new proposed method is to break the walk into several phases similar to that in Linux: RCU walk and REF walk. The RCU walk will hold the RCU lock and never blocks so the function itself can be non-async. If we hit non-present dentries, we will fallback to REF walk. In REF walks, we clone the Arcs and consult to the VFS layer for an accurate answer. Note that in both the two methods mentioned above, symlinks are not handled and will be returned directly with all path components left untouched. We have a dedicated async function to follow the symlinks by recursive calling the walk function. This can be slow and won't be called frequently. So we wrapped the function with `Box::pin()` to break the recursion chain. After the symlink resolution is done, we return to the original position and continue the walk. We found that the association of an inode to a dentry is one way. So the `data` RCUPointer field is actually unnecessary and we can use the atomic dentry type to sync readers with the writer. This way we can eliminate `DentryData` allocations and improve performance. We also introduced a new RCU read lock syntax. In the RCU walk mentioned above, we need to store dentry references protected by some RCU read lock. With the old syntax, we can't express the lifetime associtated by the common RCU read lock. The new syntax provides a `rcu_read_lock()` method to acquire the RCU read lock. The lock returned has a associated lifetime so we can use it in the RCU session. Signed-off-by: greatbridf --- Cargo.lock | 5 + Cargo.toml | 8 +- crates/arcref/Cargo.lock | 7 + crates/arcref/Cargo.toml | 11 + crates/arcref/src/arcref.rs | 216 +++++++++++++++++ crates/arcref/src/lib.rs | 8 + crates/posix_types/src/result.rs | 6 + src/kernel/vfs/dentry.rs | 394 +++++++++++++------------------ src/kernel/vfs/dentry/dcache.rs | 76 +++++- src/kernel/vfs/dentry/walk.rs | 370 +++++++++++++++++++++++++++++ src/path.rs | 40 ++-- src/rcu.rs | 99 ++++---- 12 files changed, 942 insertions(+), 298 deletions(-) create mode 100644 crates/arcref/Cargo.lock create mode 100644 crates/arcref/Cargo.toml create mode 100644 crates/arcref/src/arcref.rs create mode 100644 crates/arcref/src/lib.rs create mode 100644 src/kernel/vfs/dentry/walk.rs diff --git a/Cargo.lock b/Cargo.lock index 32868677..f85a9d2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,10 @@ dependencies = [ "log", ] +[[package]] +name = "arcref" +version = "0.1.0" + [[package]] name = "async-trait" version = "0.1.89" @@ -155,6 +159,7 @@ dependencies = [ "acpi", "align_ext", "another_ext4", + "arcref", "async-trait", "atomic_unique_refcell", "bitflags", diff --git a/Cargo.toml b/Cargo.toml index bc7e7b0c..5158025e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,9 @@ edition = "2021" crate-type = ["bin"] [dependencies] +arcref = { path = "./crates/arcref", default-features = false, features = [ + "alloc", +] } atomic_unique_refcell = { path = "./crates/atomic_unique_refcell", features = [ "no_std", ] } @@ -35,7 +38,10 @@ stalloc = { version = "0.6.1", default-features = false, features = [ "allocator-api", ] } async-trait = "0.1.89" -futures = { version = "0.3.31", features = ["alloc", "async-await"], default-features = false } +futures = { version = "0.3.31", features = [ + "alloc", + "async-await", +], default-features = false } [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/crates/arcref/Cargo.lock b/crates/arcref/Cargo.lock new file mode 100644 index 00000000..3c4e1567 --- /dev/null +++ b/crates/arcref/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "arcref" +version = "0.1.0" diff --git a/crates/arcref/Cargo.toml b/crates/arcref/Cargo.toml new file mode 100644 index 00000000..a0af89f8 --- /dev/null +++ b/crates/arcref/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "arcref" +version = "0.1.0" +edition = "2024" + +[dependencies] + +[features] +alloc = [] +std = ["alloc"] +default = ["std"] diff --git a/crates/arcref/src/arcref.rs b/crates/arcref/src/arcref.rs new file mode 100644 index 00000000..3d01852d --- /dev/null +++ b/crates/arcref/src/arcref.rs @@ -0,0 +1,216 @@ +#[cfg(not(feature = "std"))] +use core::{ + borrow::Borrow, + marker::{PhantomData, Unsize}, + mem::ManuallyDrop, + ops::{Deref, DispatchFromDyn}, +}; + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +extern crate alloc; + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +use alloc::sync::Arc; + +#[cfg(feature = "std")] +use std::{ + borrow::Borrow, + marker::{PhantomData, Unsize}, + mem::ManuallyDrop, + ops::{Deref, DispatchFromDyn}, + sync::Arc, +}; + +pub trait AsArcRef +where + T: ?Sized, +{ + /// Borrow the [`Arc`] and convert the reference into [`ArcRef`]. + fn aref(&self) -> ArcRef<'_, T>; +} + +pub struct ArcRef<'a, T: ?Sized> { + ptr: *const T, + _phantom: PhantomData<&'a ()>, +} + +unsafe impl Send for ArcRef<'_, T> {} +unsafe impl Sync for ArcRef<'_, T> {} + +#[cfg(any(feature = "std", feature = "alloc"))] +impl<'a, T: ?Sized> ArcRef<'a, T> { + pub fn new(arc: &'a Arc) -> Self { + Self { + ptr: Arc::as_ptr(arc), + _phantom: PhantomData, + } + } + + /// Create a new `ArcRef` from a raw pointer. + /// + /// # Safety + /// The given pointer MUST be created by `Arc::as_ptr` or `Arc::into_raw`. + /// The caller is responsible to ensure that the pointer is valid for the + /// lifetime of the `ArcRef`. + pub unsafe fn new_unchecked(arc_ptr: *const T) -> Self { + Self { + ptr: arc_ptr, + _phantom: PhantomData, + } + } + + pub fn with_arc(self, func: Func) -> Out + where + Func: FnOnce(&Arc) -> Out, + { + func(&ManuallyDrop::new(unsafe { Arc::from_raw(self.ptr) })) + } + + pub fn clone_arc(self) -> Arc { + self.with_arc(|arc| arc.clone()) + } + + pub fn ptr_eq_arc(self, other: &Arc) -> bool { + self.with_arc(|arc| Arc::ptr_eq(arc, other)) + } +} + +#[cfg(all(not(feature = "std"), feature = "alloc"))] +impl AsArcRef for Arc +where + T: ?Sized, +{ + fn aref(&self) -> ArcRef<'_, T> { + ArcRef::new(self) + } +} + +impl AsRef for ArcRef<'_, T> +where + T: ?Sized, +{ + fn as_ref(&self) -> &T { + self.deref() + } +} + +impl Borrow for ArcRef<'_, T> +where + T: ?Sized, +{ + fn borrow(&self) -> &T { + self.deref() + } +} + +impl<'a, T> Clone for ArcRef<'a, T> +where + T: ?Sized, +{ + fn clone(&self) -> Self { + Self { + ptr: self.ptr, + _phantom: PhantomData, + } + } +} + +impl Copy for ArcRef<'_, T> where T: ?Sized {} + +impl Deref for ArcRef<'_, T> { + type Target = T; + + fn deref(&self) -> &T { + unsafe { + // SAFETY: `self.ptr` points to a valid `T` instance because it was + // created from a valid `Arc`. + self.ptr.as_ref().unwrap_unchecked() + } + } +} + +impl<'a, T, U> DispatchFromDyn> for ArcRef<'a, T> +where + T: ?Sized + Unsize, + U: ?Sized, +{ +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn create_from_arc() { + let data = Arc::new(42); + let _arc_ref = ArcRef::new(&data); + } + + #[test] + fn deref() { + let data = Arc::new(42); + let arc_ref = ArcRef::new(&data); + + assert_eq!(*arc_ref, 42); + } + + #[test] + fn clone_into_arc() { + let data = Arc::new(42); + let arc_ref = ArcRef::new(&data); + + let cloned = arc_ref.clone_arc(); + + assert_eq!(Arc::strong_count(&data), 2); + assert_eq!(*cloned, 42); + } + + #[test] + fn dyn_compatible_receiver() { + struct Data(u32); + + trait Trait { + fn foo(self: ArcRef) -> u32; + } + + impl Trait for Data { + fn foo(self: ArcRef) -> u32 { + self.0 + } + } + + let data = Arc::new(Data(42)); + let arc_ref = ArcRef::new(&data); + + assert_eq!(arc_ref.foo(), 42); + } + + #[test] + fn clone_from_train_methods() { + struct Data(u32); + + trait Trait { + fn foo(&self) -> u32; + + fn clone_self(self: ArcRef) -> Arc; + } + + impl Trait for Data { + fn foo(&self) -> u32 { + self.0 + } + + fn clone_self(self: ArcRef) -> Arc { + self.clone_arc() as _ + } + } + + let data = Arc::new(Data(42)); + let arc_ref = ArcRef::new(&data); + + let cloned = arc_ref.clone_self(); + + assert_eq!(arc_ref.foo(), 42); + assert_eq!(cloned.foo(), 42); + } +} diff --git a/crates/arcref/src/lib.rs b/crates/arcref/src/lib.rs new file mode 100644 index 00000000..83a61985 --- /dev/null +++ b/crates/arcref/src/lib.rs @@ -0,0 +1,8 @@ +#![cfg_attr(not(feature = "std"), no_std)] +#![feature(arbitrary_self_types)] +#![feature(dispatch_from_dyn)] +#![feature(unsize)] + +mod arcref; + +pub use arcref::{ArcRef, AsArcRef}; diff --git a/crates/posix_types/src/result.rs b/crates/posix_types/src/result.rs index a10ff0ad..1535c444 100644 --- a/crates/posix_types/src/result.rs +++ b/crates/posix_types/src/result.rs @@ -1,14 +1,18 @@ pub enum PosixError { + ENOENT = 2, EFAULT = 14, EXDEV = 18, + ENOTDIR = 20, EINVAL = 22, } impl From for u32 { fn from(error: PosixError) -> Self { match error { + PosixError::ENOENT => 2, PosixError::EFAULT => 14, PosixError::EXDEV => 18, + PosixError::ENOTDIR => 20, PosixError::EINVAL => 22, } } @@ -17,8 +21,10 @@ impl From for u32 { impl core::fmt::Debug for PosixError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { + Self::ENOENT => write!(f, "ENOENT"), Self::EFAULT => write!(f, "EFAULT"), Self::EXDEV => write!(f, "EXDEV"), + Self::ENOTDIR => write!(f, "ENOTDIR"), Self::EINVAL => write!(f, "EINVAL"), } } diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index 5ac4e407..c1eb8cb8 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -1,45 +1,62 @@ pub mod dcache; +mod walk; -use super::{ - inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, - types::{DeviceId, Format, Mode, Permission}, - FsContext, -}; -use crate::{ - hash::KernelHasher, - io::{Buffer, ByteBuffer}, - kernel::{block::BlockDevice, CharDevice}, - path::{Path, PathComponent}, - prelude::*, - rcu::{RCUNode, RCUPointer, RCUReadGuard}, -}; -use crate::{ - io::Stream, - kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE}, -}; -use alloc::sync::Arc; use core::{ + cell::UnsafeCell, fmt, - future::Future, hash::{BuildHasher, BuildHasherDefault, Hasher}, - pin::Pin, - sync::atomic::{AtomicPtr, AtomicU64, Ordering}, + sync::atomic::{AtomicPtr, AtomicU64, AtomicU8, Ordering}, }; + +use alloc::sync::Arc; +use arcref::AsArcRef; use eonix_sync::LazyLock; use pointers::BorrowedArc; use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX}; -#[derive(PartialEq, Eq)] +use crate::{ + hash::KernelHasher, + io::Buffer, + io::Stream, + kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, EPERM, ERANGE}, + kernel::{block::BlockDevice, CharDevice}, + path::Path, + prelude::*, + rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}, +}; + +use super::{ + inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, + types::{DeviceId, Format, Mode, Permission}, + FsContext, +}; + +const D_INVALID: u8 = 0; +const D_REGULAR: u8 = 1; +const D_DIRECTORY: u8 = 2; +const D_SYMLINK: u8 = 3; + +#[derive(Debug, PartialEq, Eq)] enum DentryKind { - Regular, - Directory, - Symlink, - Mountpoint, + Regular = D_REGULAR as isize, + Directory = D_DIRECTORY as isize, + Symlink = D_SYMLINK as isize, } -struct DentryData { - inode: InodeUse, - kind: DentryKind, +/// The [`Inode`] associated with a [`Dentry`]. +/// +/// We could assign an inode to a negative dentry exactly once when the dentry +/// is invalid and we create a file or directory on it, or the dentry is brought +/// to the dcache by [lookup()]. +/// +/// This guarantees that as long as we acquire a non-invalid from [`Self::kind`], +/// we are synced with the writer and can safely read the [`Self::inode`] field +/// without reading torn data. +/// +/// [lookup()]: crate::kernel::vfs::inode::InodeDirOps::lookup +struct AssociatedInode { + kind: UnsafeCell>, + inode: UnsafeCell>>, } /// # Safety @@ -58,8 +75,7 @@ pub struct Dentry { prev: AtomicPtr, next: AtomicPtr, - // RCU Mutable - data: RCUPointer, + inode: AssociatedInode, } pub(super) static DROOT: LazyLock> = LazyLock::new(|| { @@ -69,7 +85,7 @@ pub(super) static DROOT: LazyLock> = LazyLock::new(|| { hash: AtomicU64::new(0), prev: AtomicPtr::default(), next: AtomicPtr::default(), - data: RCUPointer::empty(), + inode: AssociatedInode::new(), }); unsafe { @@ -119,50 +135,19 @@ impl Dentry { self.hash.store(hash, Ordering::Relaxed); } - - async fn find(self: &Arc, name: &[u8]) -> KResult> { - let data = self.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; - - if data.kind != DentryKind::Directory { - return Err(ENOTDIR); - } - - match name { - b"." => Ok(self.clone()), - b".." => Ok(self.parent().clone()), - _ => { - let dentry = Dentry::create(self.clone(), name); - - if let Some(found) = dcache::d_find_fast(&dentry) { - unsafe { - // SAFETY: This is safe because the dentry is never shared with - // others so we can drop them safely. - let _ = dentry.name.swap(None); - let _ = dentry.parent.swap(None); - } - - return Ok(found); - } - - let _ = dcache::d_try_revalidate(&dentry).await; - dcache::d_add(dentry.clone()); - - Ok(dentry) - } - } - } } impl Dentry { pub fn create(parent: Arc, name: &[u8]) -> Arc { + // TODO!!!: don't acquire our parent's refcount here... + let val = Arc::new(Self { parent: RCUPointer::new(parent), name: RCUPointer::new(Arc::new(Arc::from(name))), hash: AtomicU64::new(0), prev: AtomicPtr::default(), next: AtomicPtr::default(), - data: RCUPointer::empty(), + inode: AssociatedInode::new(), }); val.rehash(); @@ -196,27 +181,12 @@ impl Dentry { .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent)) } - fn save(&self, inode: InodeUse, kind: DentryKind) { - let new = DentryData { inode, kind }; - - // TODO!!!: We don't actually need to use `RCUPointer` here - // Safety: this function may only be called from `create`-like functions which requires the - // superblock's write locks to be held, so only one creation can happen at a time and we - // can't get a reference to the old data. - let old = unsafe { self.data.swap(Some(Arc::new(new))) }; - assert!(old.is_none()); - } - pub fn fill(&self, file: InodeUse) { - match file.format() { - Format::REG | Format::BLK | Format::CHR => self.save(file, DentryKind::Regular), - Format::DIR => self.save(file, DentryKind::Directory), - Format::LNK => self.save(file, DentryKind::Symlink), - } + self.inode.store(file); } pub fn inode(&self) -> Option> { - self.data.load().as_ref().map(|data| data.inode.clone()) + self.inode.load().map(|(_, inode)| inode.clone()) } pub fn get_inode(&self) -> KResult> { @@ -224,181 +194,85 @@ impl Dentry { } pub fn is_directory(&self) -> bool { - let data = self.data.load(); - data.as_ref() - .map_or(false, |data| data.kind == DentryKind::Directory) + self.inode + .load() + .map_or(false, |(kind, _)| kind == DentryKind::Directory) } pub fn is_valid(&self) -> bool { - self.data.load().is_some() + self.inode.load().is_some() } pub async fn open_check(self: &Arc, flags: OpenFlags, perm: Permission) -> KResult<()> { - let data = self.data.load(); - - if data.is_some() { - if flags.contains(OpenFlags::O_CREAT | OpenFlags::O_EXCL) { - Err(EEXIST) - } else { - Ok(()) - } - } else { - if !flags.contains(OpenFlags::O_CREAT) { - return Err(ENOENT); - } - - let parent = self.parent().get_inode()?; - parent.create(self, perm).await - } - } -} - -impl Dentry { - fn resolve_directory( - context: &FsContext, - dentry: Arc, - nrecur: u32, - ) -> Pin>> + use<'_>>> { - Box::pin(async move { - if nrecur >= 16 { - return Err(ELOOP); - } - - let data = dentry.data.load(); - let data = data.as_ref().ok_or(ENOENT)?; - - match data.kind { - DentryKind::Regular => Err(ENOTDIR), - DentryKind::Directory => Ok(dentry), - DentryKind::Symlink => { - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); - - data.inode.readlink(&mut buffer).await?; - let path = Path::new(buffer.data())?; - - let dentry = - Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1) - .await?; - - Self::resolve_directory(context, dentry, nrecur + 1).await + match self.inode.load() { + Some(_) => { + if flags.contains(OpenFlags::O_CREAT | OpenFlags::O_EXCL) { + Err(EEXIST) + } else { + Ok(()) } - _ => panic!("Invalid dentry flags"), - } - }) - } - - pub fn open_recursive<'r, 'a: 'r, 'b: 'r, 'c: 'r>( - context: &'a FsContext, - cwd: &'b Arc, - path: Path<'c>, - follow: bool, - nrecur: u32, - ) -> Pin>> + 'r>> { - Box::pin(async move { - // too many recursive search layers will cause stack overflow - // so we use 16 for now - if nrecur >= 16 { - return Err(ELOOP); } - - let mut cwd = if path.is_absolute() { - context.fsroot.clone() - } else { - cwd.clone() - }; - - for item in path.iter() { - if let PathComponent::TrailingEmpty = item { - if cwd.data.load().as_ref().is_none() { - return Ok(cwd); - } + None => { + if !flags.contains(OpenFlags::O_CREAT) { + return Err(ENOENT); } - cwd = Self::resolve_directory(context, cwd, nrecur).await?; - - match item { - PathComponent::TrailingEmpty | PathComponent::Current => {} // pass - PathComponent::Parent => { - if !cwd.hash_eq(&context.fsroot) { - let parent = cwd.parent().clone(); - cwd = Self::resolve_directory(context, parent, nrecur).await?; - } - continue; - } - PathComponent::Name(name) => { - cwd = cwd.find(name).await?; - } - } + let parent = self.parent().get_inode()?; + parent.create(self, perm).await } - - if follow { - let data = cwd.data.load(); - - if let Some(data) = data.as_ref() { - if data.kind == DentryKind::Symlink { - let data = cwd.data.load(); - let data = data.as_ref().unwrap(); - let mut buffer = [0u8; 256]; - let mut buffer = ByteBuffer::new(&mut buffer); - - data.inode.readlink(&mut buffer).await?; - let path = Path::new(buffer.data())?; - - let parent = cwd.parent().clone(); - cwd = - Self::open_recursive(context, &parent, path, true, nrecur + 1).await?; - } - } - } - - Ok(cwd) - }) + } } +} +impl Dentry { pub async fn open( context: &FsContext, - path: Path<'_>, + path: &Path, follow_symlinks: bool, ) -> KResult> { let cwd = context.cwd.lock().clone(); - Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0).await + Self::open_at(context, &cwd, path, follow_symlinks).await } pub async fn open_at( context: &FsContext, at: &Arc, - path: Path<'_>, + path: &Path, follow_symlinks: bool, ) -> KResult> { - Dentry::open_recursive(context, at, path, follow_symlinks, 0).await - } + let mut found = context.start_recursive_walk(at, path).await?; - pub fn get_path( - self: &Arc, - context: &FsContext, - buffer: &mut dyn Buffer, - ) -> KResult<()> { - let locked_parent = self.parent(); + if !follow_symlinks { + return Ok(found); + } - let path = { - let mut path = vec![]; + loop { + match found.inode.load() { + Some((DentryKind::Symlink, inode)) => { + found = context.follow_symlink(found.aref(), inode, 0).await?; + } + _ => return Ok(found), + } + } + } - let mut parent = locked_parent.borrow(); - let mut dentry = BorrowedArc::new(self); + pub fn get_path(self: &Arc, context: &FsContext, buffer: &mut dyn Buffer) -> KResult<()> { + let rcu_read = rcu_read_lock(); - while Arc::as_ptr(&dentry) != Arc::as_ptr(&context.fsroot) { - if path.len() > 32 { - return Err(ELOOP); - } + let mut path = vec![]; + + let mut current = self.aref(); + let mut parent = self.parent.dereference(&rcu_read).unwrap(); - path.push(dentry.name().clone()); - dentry = parent; - parent = dentry.parent.load_protected(&locked_parent).unwrap(); + while !current.ptr_eq_arc(&context.fsroot) { + if path.len() > 32 { + return Err(ELOOP); } - path - }; + path.push(current.name.dereference(&rcu_read).unwrap()); + current = parent; + parent = current.parent.dereference(&rcu_read).unwrap(); + } buffer.fill(b"/")?.ok_or(ERANGE)?; for item in path.iter().rev().map(|name| name.as_ref()) { @@ -533,3 +407,71 @@ impl Dentry { old_parent.rename(rename_data).await } } + +impl DentryKind { + fn into_raw(self) -> u8 { + unsafe { core::mem::transmute(self) } + } + + fn from_raw(raw: u8) -> Option { + unsafe { core::mem::transmute(raw) } + } + + fn as_atomic(me: &UnsafeCell>) -> &AtomicU8 { + unsafe { AtomicU8::from_ptr(me.get().cast()) } + } + + fn atomic_acq(me: &UnsafeCell>) -> Option { + Self::from_raw(Self::as_atomic(me).load(Ordering::Acquire)) + } + + fn atomic_swap_acqrel(me: &UnsafeCell>, kind: Option) -> Option { + Self::from_raw(Self::as_atomic(me).swap(kind.map_or(0, Self::into_raw), Ordering::AcqRel)) + } +} + +impl AssociatedInode { + fn new() -> Self { + Self { + inode: UnsafeCell::new(None), + kind: UnsafeCell::new(None), + } + } + + fn store(&self, inode: InodeUse) { + let kind = match inode.format() { + Format::REG | Format::BLK | Format::CHR => DentryKind::Regular, + Format::DIR => DentryKind::Directory, + Format::LNK => DentryKind::Symlink, + }; + + unsafe { + // SAFETY: We should be the first and only one to store the inode as + // is checked below. All other readers reading non-invalid + // kind will see the fully written inode. + self.inode.get().write(Some(inode)); + } + + assert_eq!( + DentryKind::atomic_swap_acqrel(&self.kind, Some(kind)), + None, + "Dentry can only be stored once." + ); + } + + fn kind(&self) -> Option { + DentryKind::atomic_acq(&self.kind) + } + + fn load(&self) -> Option<(DentryKind, &InodeUse)> { + self.kind().map(|kind| unsafe { + let inode = (&*self.inode.get()) + .as_ref() + .expect("Dentry with non-invalid kind has no inode"); + (kind, inode) + }) + } +} + +unsafe impl Send for AssociatedInode {} +unsafe impl Sync for AssociatedInode {} diff --git a/src/kernel/vfs/dentry/dcache.rs b/src/kernel/vfs/dentry/dcache.rs index e2491235..ee7503dc 100644 --- a/src/kernel/vfs/dentry/dcache.rs +++ b/src/kernel/vfs/dentry/dcache.rs @@ -1,11 +1,13 @@ use super::Dentry; use crate::kernel::constants::ENOENT; -use crate::rcu::RCUPointer; +use crate::rcu::{RCUPointer, RCUReadLock}; use crate::{ prelude::*, rcu::{RCUIterator, RCUList}, }; use alloc::sync::Arc; +use arcref::ArcRef; +use core::ops::Deref; use core::sync::atomic::Ordering; use eonix_sync::Mutex; @@ -16,26 +18,47 @@ static DCACHE: [RCUList; 1 << DCACHE_HASH_BITS] = static D_EXCHANGE_LOCK: Mutex<()> = Mutex::new(()); -pub fn d_hinted(dentry: &Dentry) -> &'static RCUList { - let hash = dentry.hash.load(Ordering::Relaxed) as usize & ((1 << DCACHE_HASH_BITS) - 1); +pub trait DCacheItem { + fn d_hash(&self) -> usize; + fn d_parent(&self) -> *const Dentry; + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r; +} + +fn d_eq(lhs: &impl DCacheItem, rhs: &impl DCacheItem, rcu_read: &RCUReadLock) -> bool { + lhs.d_hash() == rhs.d_hash() + && lhs.d_parent() == rhs.d_parent() + && *lhs.d_name(rcu_read) == *rhs.d_name(rcu_read) +} + +fn d_hinted(item: &impl DCacheItem) -> &'static RCUList { + let hash = item.d_hash() & ((1 << DCACHE_HASH_BITS) - 1); &DCACHE[hash] } -pub fn d_iter_for(dentry: &Dentry) -> RCUIterator<'static, Dentry> { - d_hinted(dentry).iter() +fn d_iter_for<'rcu>( + item: &impl DCacheItem, + rcu_read: &'rcu RCUReadLock, +) -> RCUIterator<'static, 'rcu, Dentry> { + d_hinted(item).iter(rcu_read) +} + +pub fn d_find_rcu<'rcu>( + item: &impl DCacheItem, + rcu_read: &'rcu RCUReadLock, +) -> Option> { + d_iter_for(item, rcu_read).find(|cur_ref| cur_ref.with_arc(|cur| d_eq(cur, item, rcu_read))) } /// Add the dentry to the dcache pub fn d_add(dentry: Arc) { + // TODO: Add `children` field to parent and lock parent dentry to avoid + // concurrent insertion causing duplication. d_hinted(&dentry).insert(dentry); } -pub fn d_find_fast(dentry: &Dentry) -> Option> { - d_iter_for(dentry) - .find(|cur| cur.hash_eq(dentry)) - .map(|dentry| dentry.clone()) -} - /// Call `lookup()` on the parent inode to try find if the dentry points to a valid inode /// /// Silently fail without any side effects @@ -80,3 +103,34 @@ pub async fn d_exchange(old: &Arc, new: &Arc) { d_add(old.clone()); d_add(new.clone()); } + +impl DCacheItem for Arc { + fn d_hash(&self) -> usize { + self.hash.load(Ordering::Relaxed) as usize + } + + fn d_parent(&self) -> *const Dentry { + self.parent_addr() + } + + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r { + struct Name<'a>(ArcRef<'a, Arc<[u8]>>); + + impl Deref for Name<'_> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + Name( + self.name + .dereference(rcu_read) + .expect("Dentry should have a non-null name"), + ) + } +} diff --git a/src/kernel/vfs/dentry/walk.rs b/src/kernel/vfs/dentry/walk.rs new file mode 100644 index 00000000..3e401b4b --- /dev/null +++ b/src/kernel/vfs/dentry/walk.rs @@ -0,0 +1,370 @@ +use core::{ + future::Future, + hash::{BuildHasher, BuildHasherDefault, Hasher}, + ops::Deref, + pin::Pin, +}; + +use alloc::{boxed::Box, sync::Arc}; +use arcref::{ArcRef, AsArcRef}; +use posix_types::result::PosixError; + +use crate::{ + hash::KernelHasher, + io::ByteBuffer, + kernel::{ + constants::ELOOP, + vfs::{ + inode::{Inode, InodeUse}, + FsContext, + }, + }, + path::{Path, PathComponent, PathIterator}, + prelude::KResult, + rcu::{rcu_read_lock, RCUReadLock}, +}; + +use super::{ + dcache::{self, DCacheItem}, + Dentry, DentryKind, +}; + +struct DentryFind<'a, 'b> { + parent: &'a Dentry, + name: &'b [u8], + hash: usize, +} + +pub enum WalkResultRcu<'rcu, 'path> { + Err(PosixError), + Ok(ArcRef<'rcu, Dentry>), + Symlink { + symlink: ArcRef<'rcu, Dentry>, + inode: InodeUse, + }, + Miss { + parent: ArcRef<'rcu, Dentry>, + name: &'path [u8], + }, +} + +pub enum WalkResult { + Err(PosixError), + Ok(Arc), + Symlink { + symlink: Arc, + inode: InodeUse, + }, +} + +impl Dentry { + /// Quick path of the dentry find operation. + /// + /// Check invalid and non-directory dentries, return immediately on dot and + /// dotdot component, and do a quick rcu dcache lookup. + /// + /// Note that while `Some(dentry)` guarantees present and valid dentry, + /// returning `None` is acceptable if the actual file exists but is not in + /// the dentry cache. If so, we should check again with `lookup`. + fn find_rcu<'r, 's: 'r>( + self: ArcRef<'s, Self>, + name: &[u8], + rcu_read: &'r RCUReadLock, + ) -> Result>, PosixError> { + match self.inode.load() { + Some((DentryKind::Directory, _)) => {} + Some(_) => return Err(PosixError::ENOTDIR), + None => return Err(PosixError::ENOENT), + } + + match name { + b"." => Ok(Some(self)), + b".." => Ok(Some( + self.parent + .dereference(rcu_read) + .expect("The field `parent` should be non-null"), + )), + _ => { + let dentry_find = DentryFind::new(&self, name); + Ok(dcache::d_find_rcu(&dentry_find, rcu_read)) + } + } + } + + async fn find_slow(self: &Arc, name: &[u8]) -> Result, PosixError> { + let dentry = Dentry::create(self.clone(), name); + + let _ = dcache::d_try_revalidate(&dentry).await; + dcache::d_add(dentry.clone()); + + Ok(dentry) + } + + pub async fn find_full(self: &Arc, name: &[u8]) -> Result, PosixError> { + if let Some(dentry) = self.aref().find_rcu(name, &rcu_read_lock())? { + return Ok(dentry.clone_arc()); + } + + self.find_slow(name).await + } +} + +impl FsContext { + /// Walk the pathname and try to find the corresponding dentry FAST without + /// consulting the VFS for invalid dentries encountered. + fn walk_rcu<'rcu, 'path>( + &self, + mut current: ArcRef<'rcu, Dentry>, + iter: &mut PathIterator<'path>, + rcu_read: &'rcu RCUReadLock, + ) -> WalkResultRcu<'rcu, 'path> { + use PathComponent::*; + + loop { + let inode = current.inode.load(); + + if iter.is_empty() { + break; + } + + // Skip symlink resolution in rcu walk without consuming the iter. + if let Some((DentryKind::Symlink, inode)) = inode { + return WalkResultRcu::Symlink { + symlink: current, + inode: inode.clone(), + }; + } + + let Some(component) = iter.next() else { + break; + }; + + match (inode, component) { + // Skip trailing empty and dot for normal directories. + (Some((DentryKind::Directory, _)), TrailingEmpty | Current) => {} + // Walk to parent directory unless we are at the filesystem root. + (Some((DentryKind::Directory, _)), Parent) => { + if current.ptr_eq_arc(&self.fsroot) { + continue; + } + + current = current + .parent + .dereference(&rcu_read) + .expect("parent should exist"); + } + // Normal directory traversal + (Some((DentryKind::Directory, _)), Name(name)) => { + match current.find_rcu(name, &rcu_read) { + Err(err) => return WalkResultRcu::Err(err), + Ok(Some(found)) => { + current = found; + } + Ok(None) => { + return WalkResultRcu::Miss { + name, + parent: current, + }; + } + } + } + // Not a directory, fail and exit. + (Some(_), _) => return WalkResultRcu::Err(PosixError::ENOTDIR), + // Return invalid trailing entries directly. + (None, TrailingEmpty) => return WalkResultRcu::Ok(current), + // Invalid intermediate entries are not acceptable. + (None, _) => return WalkResultRcu::Err(PosixError::ENOENT), + } + } + + WalkResultRcu::Ok(current) + } + + /// Walk the pathname slowly with refcounts held and VFS lookups. + async fn walk_slow(&self, mut current: Arc, iter: &mut PathIterator<'_>) -> WalkResult { + use PathComponent::*; + + loop { + // `current` should be the parent directory and `component` is the + // next path component we are stepping into. + + if iter.is_empty() { + break; + } + + if let Some((DentryKind::Symlink, inode)) = current.inode.load() { + return WalkResult::Symlink { + inode: inode.clone(), + symlink: current, + }; + } + + let Some(component) = iter.next() else { + break; + }; + + match (current.inode.load(), &component) { + // Normal directory traversal + (Some((DentryKind::Directory, _)), _) => {} + // Not a directory, fail and exit. + (Some(_), _) => return WalkResult::Err(PosixError::ENOTDIR), + // Return invalid trailing entries directly. + (None, TrailingEmpty) => return WalkResult::Ok(current), + // Invalid intermediate entries are not acceptable. + (None, _) => return WalkResult::Err(PosixError::ENOENT), + } + + match component { + PathComponent::TrailingEmpty => {} + PathComponent::Current => {} + PathComponent::Parent => { + if current.hash_eq(&self.fsroot) { + continue; + } + + let parent = current.parent().clone(); + current = parent; + } + PathComponent::Name(name) => { + match current.find_full(name).await { + Ok(found) => current = found, + Err(err) => return WalkResult::Err(err), + }; + } + } + } + + WalkResult::Ok(current) + } + + /// Walk the pathname and get an accurate answer. Stop at symlinks. + async fn walk_full( + &self, + current: ArcRef<'_, Dentry>, + iter: &mut PathIterator<'_>, + ) -> WalkResult { + let (parent_slow, name_slow); + + match self.walk_rcu(current, iter, &rcu_read_lock()) { + WalkResultRcu::Err(error) => return WalkResult::Err(error.into()), + WalkResultRcu::Ok(dentry) => return WalkResult::Ok(dentry.clone_arc()), + WalkResultRcu::Symlink { symlink, inode } => { + return WalkResult::Symlink { + symlink: symlink.clone_arc(), + inode, + }; + } + WalkResultRcu::Miss { parent, name } => { + // Fallback to regular refcounted lookup + parent_slow = parent.clone_arc(); + name_slow = name; + } + } + + match parent_slow.find_slow(name_slow).await { + Ok(found) => self.walk_slow(found, iter).await, + Err(err) => return WalkResult::Err(err), + } + } + + pub async fn follow_symlink( + &self, + symlink: ArcRef<'_, Dentry>, + inode: &InodeUse, + nr_follows: u32, + ) -> KResult> { + let mut target = [0; 256]; + let mut target = ByteBuffer::new(&mut target); + inode.readlink(&mut target).await?; + + self.walk_recursive( + &symlink.parent().clone(), + Path::new(target.data()).unwrap(), + nr_follows + 1, + ) + .await + } + + fn follow_symlink_boxed<'r, 'a: 'r, 'b: 'r, 'c: 'r>( + &'a self, + symlink: ArcRef<'b, Dentry>, + inode: &'c InodeUse, + nr_follows: u32, + ) -> Pin>> + Send + 'r>> { + Box::pin(self.follow_symlink(symlink, inode, nr_follows)) + } + + async fn walk_recursive( + &self, + cwd: &Arc, + path: &Path, + nr_follows: u32, + ) -> KResult> { + const MAX_NR_FOLLOWS: u32 = 16; + + let mut current_owned; + let mut current; + if path.is_absolute() { + current = self.fsroot.aref(); + } else { + current = cwd.aref(); + } + + let mut path_iter = path.iter(); + + loop { + match self.walk_full(current, &mut path_iter).await { + WalkResult::Err(posix_error) => return Err(posix_error.into()), + WalkResult::Ok(dentry) => return Ok(dentry), + WalkResult::Symlink { symlink, inode } => { + if nr_follows >= MAX_NR_FOLLOWS { + return Err(ELOOP); + } + + current_owned = self + .follow_symlink_boxed(symlink.aref(), &inode, nr_follows) + .await?; + current = current_owned.aref(); + } + } + } + } + + pub async fn start_recursive_walk( + &self, + cwd: &Arc, + path: &Path, + ) -> KResult> { + self.walk_recursive(cwd, path, 0).await + } +} + +impl<'a, 'b> DentryFind<'a, 'b> { + fn new(parent: &'a Dentry, name: &'b [u8]) -> Self { + let builder: BuildHasherDefault = Default::default(); + let mut hasher = builder.build_hasher(); + + hasher.write_usize(parent as *const _ as usize); + hasher.write(name); + let hash = hasher.finish() as usize; + + Self { parent, name, hash } + } +} + +impl DCacheItem for DentryFind<'_, '_> { + fn d_hash(&self) -> usize { + self.hash + } + + fn d_parent(&self) -> *const Dentry { + self.parent as *const _ + } + + fn d_name<'r, 'a: 'r, 'b: 'a>( + &'a self, + _rcu_read: &'b RCUReadLock, + ) -> impl Deref + 'r { + self.name + } +} diff --git a/src/path.rs b/src/path.rs index 8b740095..b342ef5f 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,34 +1,30 @@ use crate::{kernel::constants::ENOENT, prelude::*}; use core::fmt::{self, Debug, Formatter}; -pub struct Path<'lt> { - all: &'lt [u8], +#[repr(transparent)] +pub struct Path { + all: [u8], } pub struct PathIterator<'lt> { rem: &'lt [u8], } -#[allow(dead_code)] -impl<'lt> Path<'lt> { - pub fn new(all: &'lt [u8]) -> KResult { +impl Path { + pub fn new(all: &[u8]) -> KResult<&Self> { if all.is_empty() { Err(ENOENT) } else { - Ok(Self { all }) + Ok(unsafe { &*(all as *const [u8] as *const Path) }) } } - pub fn from_str(all: &'lt str) -> KResult { - Self::new(all.as_bytes()) - } - pub fn is_absolute(&self) -> bool { self.all.starts_with(&['/' as u8]) } - pub fn iter(&self) -> PathIterator<'lt> { - PathIterator::new(self.all) + pub fn iter(&self) -> PathIterator { + PathIterator::new(&self.all) } } @@ -46,11 +42,17 @@ pub enum PathComponent<'lt> { Parent, } +impl PathIterator<'_> { + pub fn is_empty(&self) -> bool { + self.rem.is_empty() + } +} + impl<'lt> Iterator for PathIterator<'lt> { type Item = PathComponent<'lt>; fn next(&mut self) -> Option { - if self.rem.is_empty() { + if self.is_empty() { return None; } @@ -71,16 +73,16 @@ impl<'lt> Iterator for PathIterator<'lt> { self.rem = rem; match cur { - cur if cur.is_empty() => Some(PathComponent::TrailingEmpty), - cur if cur == b"." => Some(PathComponent::Current), - cur if cur == b".." => Some(PathComponent::Parent), - cur => Some(PathComponent::Name(cur)), + b"" => Some(PathComponent::TrailingEmpty), + b"." => Some(PathComponent::Current), + b".." => Some(PathComponent::Parent), + name => Some(PathComponent::Name(name)), } } } -impl Debug for Path<'_> { +impl Debug for Path { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "Path({:?})", self.all) + write!(f, "Path({:?})", &self.all) } } diff --git a/src/rcu.rs b/src/rcu.rs index c1645d33..b06db9e2 100644 --- a/src/rcu.rs +++ b/src/rcu.rs @@ -1,21 +1,35 @@ use crate::{kernel::task::block_on, prelude::*}; use alloc::sync::Arc; +use arcref::ArcRef; use core::{ ops::Deref, ptr::NonNull, sync::atomic::{AtomicPtr, Ordering}, }; +use eonix_preempt::PreemptGuard; use eonix_runtime::scheduler::RUNTIME; -use eonix_sync::{Mutex, RwLock, RwLockReadGuard}; +use eonix_sync::{RwLock, RwLockReadGuard}; use pointers::BorrowedArc; +/// The RCU Read Lock. Holding a reference to an instance of the struct assures +/// you that any RCU protected data would not be dropped. +/// +/// The struct cannot be created directly. Instead, use [`rcu_read_lock()`]. +#[derive(Debug)] +pub struct RCUReadLock(); + +pub struct RCUReadGuardNew { + guard: RwLockReadGuard<'static, RCUReadLock>, + _disable_preempt: PreemptGuard<()>, +} + pub struct RCUReadGuard<'data, T: 'data> { value: T, - _guard: RwLockReadGuard<'data, ()>, + _guard: RwLockReadGuard<'static, RCUReadLock>, _phantom: PhantomData<&'data T>, } -static GLOBAL_RCU_SEM: RwLock<()> = RwLock::new(()); +static GLOBAL_RCU_SEM: RwLock = RwLock::new(RCUReadLock()); impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { fn lock(value: BorrowedArc<'data, T>) -> Self { @@ -25,14 +39,6 @@ impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> { _phantom: PhantomData, } } - - pub fn borrow(&self) -> BorrowedArc<'data, T> { - unsafe { - BorrowedArc::from_raw(NonNull::new_unchecked( - &raw const *self.value.borrow() as *mut T - )) - } - } } impl<'data, T: 'data> Deref for RCUReadGuard<'data, T> { @@ -63,17 +69,14 @@ pub trait RCUNode { pub struct RCUList> { head: AtomicPtr, - - reader_lock: RwLock<()>, - update_lock: Mutex<()>, + update_lock: Spin<()>, } impl> RCUList { pub const fn new() -> Self { Self { head: AtomicPtr::new(core::ptr::null_mut()), - reader_lock: RwLock::new(()), - update_lock: Mutex::new(()), + update_lock: Spin::new(()), } } @@ -117,7 +120,6 @@ impl> RCUList { unsafe { Arc::from_raw(me) }; } - let _lck = self.reader_lock.write(); node.rcu_prev() .store(core::ptr::null_mut(), Ordering::Release); node.rcu_next() @@ -152,7 +154,6 @@ impl> RCUList { unsafe { Arc::from_raw(old) }; } - let _lck = self.reader_lock.write(); old_node .rcu_prev() .store(core::ptr::null_mut(), Ordering::Release); @@ -161,36 +162,36 @@ impl> RCUList { .store(core::ptr::null_mut(), Ordering::Release); } - pub fn iter(&self) -> RCUIterator { - let _lck = block_on(self.reader_lock.read()); - + pub fn iter<'a, 'r>(&'a self, _lock: &'r RCUReadLock) -> RCUIterator<'a, 'r, T> { RCUIterator { - // SAFETY: We have a read lock, so the node is still alive. - cur: NonNull::new(self.head.load(Ordering::SeqCst)), - _lock: _lck, + cur: NonNull::new(self.head.load(Ordering::Acquire)), + _phantom: PhantomData, } } } -pub struct RCUIterator<'lt, T: RCUNode> { +pub struct RCUIterator<'list, 'rcu, T: RCUNode> { cur: Option>, - _lock: RwLockReadGuard<'lt, ()>, + _phantom: PhantomData<(&'list (), &'rcu ())>, } -impl<'lt, T: RCUNode> Iterator for RCUIterator<'lt, T> { - type Item = BorrowedArc<'lt, T>; +impl<'rcu, T: RCUNode> Iterator for RCUIterator<'_, 'rcu, T> { + type Item = ArcRef<'rcu, T>; fn next(&mut self) -> Option { - match self.cur { - None => None, - Some(pointer) => { - // SAFETY: We have a read lock, so the node is still alive. - let reference = unsafe { pointer.as_ref() }; + self.cur.map(|pointer| { + let reference = unsafe { + // SAFETY: We have the read lock so the node is still alive. + pointer.as_ref() + }; + + self.cur = NonNull::new(reference.rcu_next().load(Ordering::Acquire)); - self.cur = NonNull::new(reference.rcu_next().load(Ordering::SeqCst)); - Some(unsafe { BorrowedArc::from_raw(pointer) }) + unsafe { + // SAFETY: We have the read lock so the node is still alive. + ArcRef::new_unchecked(pointer.as_ptr()) } - } + }) } } @@ -228,15 +229,16 @@ where } pub fn load<'lt>(&self) -> Option>> { + // BUG: We should acquire the lock before loading the pointer NonNull::new(self.0.load(Ordering::Acquire)) .map(|p| RCUReadGuard::lock(unsafe { BorrowedArc::from_raw(p) })) } - pub fn load_protected<'a, U: 'a>( - &self, - _guard: &RCUReadGuard<'a, U>, - ) -> Option> { - NonNull::new(self.0.load(Ordering::Acquire)).map(|p| unsafe { BorrowedArc::from_raw(p) }) + pub fn dereference<'r, 'a: 'r>(&self, _lock: &'a RCUReadLock) -> Option> { + NonNull::new(self.0.load(Ordering::Acquire)).map(|p| unsafe { + // SAFETY: We have a read lock, so the node is still alive. + ArcRef::new_unchecked(p.as_ptr()) + }) } /// # Safety @@ -289,3 +291,18 @@ where } } } + +impl Deref for RCUReadGuardNew { + type Target = RCUReadLock; + + fn deref(&self) -> &Self::Target { + &self.guard + } +} + +pub fn rcu_read_lock() -> RCUReadGuardNew { + RCUReadGuardNew { + guard: block_on(GLOBAL_RCU_SEM.read()), + _disable_preempt: PreemptGuard::new(()), + } +} From 49a49032edc3d071092ec1fdb5f3b9aa070e107b Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 14 Sep 2025 22:22:51 +0800 Subject: [PATCH 03/25] style, vfs: remove unused imports and InodeRef Signed-off-by: greatbridf --- src/kernel/vfs/inode/inode.rs | 5 ----- src/kernel/vfs/inode/mod.rs | 4 +--- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/kernel/vfs/inode/inode.rs b/src/kernel/vfs/inode/inode.rs index 786a31fe..870a045d 100644 --- a/src/kernel/vfs/inode/inode.rs +++ b/src/kernel/vfs/inode/inode.rs @@ -321,11 +321,6 @@ pub struct InodeInfo { pub mtime: Instant, } -#[derive(Clone)] -pub struct InodeRef(Weak) -where - I: Inode + ?Sized; - pub struct InodeUse(Arc) where I: Inode + ?Sized; diff --git a/src/kernel/vfs/inode/mod.rs b/src/kernel/vfs/inode/mod.rs index 786d30fa..08471ef3 100644 --- a/src/kernel/vfs/inode/mod.rs +++ b/src/kernel/vfs/inode/mod.rs @@ -4,7 +4,5 @@ mod ops; mod statx; pub use ino::Ino; -pub use inode::{ - Inode, InodeDir, InodeDirOps, InodeFile, InodeFileOps, InodeInfo, InodeOps, InodeRef, InodeUse, -}; +pub use inode::{Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; pub use ops::{RenameData, WriteOffset}; From 632f1c7882876b7305b90ca5d162172d9e5350b1 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 14 Sep 2025 22:23:09 +0800 Subject: [PATCH 04/25] vfs: fix debug print of `Mode` structs If we have invalid format, we should print a None instead of panicking. Signed-off-by: greatbridf --- src/kernel/vfs/types/mode.rs | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/kernel/vfs/types/mode.rs b/src/kernel/vfs/types/mode.rs index dc1b88ec..a58c8215 100644 --- a/src/kernel/vfs/types/mode.rs +++ b/src/kernel/vfs/types/mode.rs @@ -44,13 +44,20 @@ impl Mode { } pub fn format(&self) -> Format { + match self.try_format() { + None => panic!("unknown format bits: {:#o}", self.format_bits()), + Some(format) => format, + } + } + + pub fn try_format(&self) -> Option { match self.format_bits() { - S_IFREG => Format::REG, - S_IFDIR => Format::DIR, - S_IFLNK => Format::LNK, - S_IFBLK => Format::BLK, - S_IFCHR => Format::CHR, - _ => panic!("unknown format bits: {:#o}", self.format_bits()), + S_IFREG => Some(Format::REG), + S_IFDIR => Some(Format::DIR), + S_IFLNK => Some(Format::LNK), + S_IFBLK => Some(Format::BLK), + S_IFCHR => Some(Format::CHR), + _ => None, } } @@ -58,10 +65,6 @@ impl Mode { Permission::new(self.non_format_bits()) } - pub const fn non_format(&self) -> Self { - Self::new(self.non_format_bits()) - } - pub const fn set_perm(&mut self, perm: Permission) { self.0 = self.format_bits() | perm.bits(); } @@ -100,15 +103,15 @@ impl core::fmt::Debug for Mode { match self.non_format_bits() & !0o777 { 0 => write!( f, - "Mode({format:?}, {perm:#o})", - format = self.format(), - perm = self.non_format_bits() + "Mode({format:?}, {perm:?})", + format = self.try_format(), + perm = Permission::new(self.non_format_bits()), )?, rem => write!( f, - "Mode({format:?}, {perm:#o}, rem={rem:#x})", - format = self.format(), - perm = self.non_format_bits() & 0o777 + "Mode({format:?}, {perm:?}, rem={rem:#x})", + format = self.try_format(), + perm = Permission::new(self.non_format_bits()) )?, } From 210a6693c73e5f38fa9ddacde82018df1712afc8 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 8 Oct 2025 11:10:13 +0800 Subject: [PATCH 05/25] mem, paging: introduce page locks and exclusive pages Introduce the new page locking mechanism to ensure exclusiveness of pages when we access them. The underlying locks are not implemented yet for now because we will change the paging structs in the following few patches. Introduce a new `PageExcl` struct representing a page that conforms with Rust's ownership rules. The page owned exclusively can be accessed without taking page locks. Remove the `MemoryBlock` structs as they are not easy to use and represent barely no semantic meanings. Signed-off-by: greatbridf --- .rustfmt.toml | 4 +- Cargo.lock | 7 + Cargo.toml | 1 + src/driver/ahci/command_table.rs | 61 ++++--- src/driver/ahci/mod.rs | 42 +++-- src/driver/ahci/port.rs | 104 +++++------- src/driver/ahci/slot.rs | 179 +++++++++++++++------ src/driver/e1000e.rs | 135 ++++++---------- src/driver/virtio/virtio_blk.rs | 55 +++---- src/fs/fat32.rs | 64 +++----- src/io.rs | 30 ++-- src/kernel/block.rs | 222 +++++++------------------- src/kernel/mem.rs | 4 +- src/kernel/mem/access.rs | 118 +------------- src/kernel/mem/allocator.rs | 8 +- src/kernel/mem/mm_area.rs | 52 +++--- src/kernel/mem/mm_list.rs | 39 ++--- src/kernel/mem/page_alloc/raw_page.rs | 9 +- src/kernel/mem/page_cache.rs | 37 ++--- src/kernel/mem/paging.rs | 129 ++++++++++++--- src/kernel/vfs/file/mod.rs | 34 ++-- src/lib.rs | 50 +++--- 22 files changed, 633 insertions(+), 751 deletions(-) diff --git a/.rustfmt.toml b/.rustfmt.toml index d69872c6..17b2bbc5 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -29,8 +29,8 @@ fn_single_line = false where_single_line = false imports_indent = "Block" imports_layout = "Mixed" -imports_granularity = "Preserve" -group_imports = "Preserve" +imports_granularity = "Module" +group_imports = "StdExternalCrate" reorder_imports = true reorder_modules = true reorder_impl_items = false diff --git a/Cargo.lock b/Cargo.lock index f85a9d2f..c70190a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -180,6 +180,7 @@ dependencies = [ "posix_types", "slab_allocator", "stalloc", + "static_assertions", "unwinding", "virtio-drivers", "xmas-elf", @@ -525,6 +526,12 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a37f0ead4094eeb54c6893316aa139e48b252f1c07511e5124fa1f9414df5b6c" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "syn" version = "2.0.104" diff --git a/Cargo.toml b/Cargo.toml index 5158025e..dca5d34d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ futures = { version = "0.3.31", features = [ "alloc", "async-await", ], default-features = false } +static_assertions = "1.1.0" [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/src/driver/ahci/command_table.rs b/src/driver/ahci/command_table.rs index c77b4abd..7b78d26f 100644 --- a/src/driver/ahci/command_table.rs +++ b/src/driver/ahci/command_table.rs @@ -1,45 +1,54 @@ -use super::{command::Command, PRDTEntry, FISH2D}; -use crate::kernel::mem::{AsMemoryBlock as _, Page}; +use core::ptr::NonNull; + use eonix_mm::address::PAddr; -pub struct CommandTable<'a> { - page: Page, - command_fis: &'a mut FISH2D, +use super::command::Command; +use super::{PRDTEntry, FISH2D}; +use crate::kernel::mem::{Page, PageExt}; - prdt: &'a mut [PRDTEntry; 248], - prdt_entries: Option, +pub struct CommandTable { + page: Page, + cmd_fis: NonNull, + prdt: NonNull<[PRDTEntry; 248]>, + prdt_entries: usize, } -impl CommandTable<'_> { +unsafe impl Send for CommandTable {} +unsafe impl Sync for CommandTable {} + +impl CommandTable { pub fn new() -> Self { let page = Page::alloc(); - let memory = page.as_memblk(); - - let (lhs, prdt) = memory.split_at(0x80); - - let (command_fis, _) = lhs.split_at(size_of::()); - let command_fis = unsafe { command_fis.as_ptr().as_mut() }; - let prdt = unsafe { prdt.as_ptr().as_mut() }; - - Self { - page, - command_fis, - prdt, - prdt_entries: None, + let base = page.get_ptr(); + + unsafe { + Self { + page, + cmd_fis: base.cast(), + prdt: base.byte_add(0x80).cast(), + prdt_entries: 0, + } } } pub fn setup(&mut self, cmd: &impl Command) { - self.command_fis.setup(cmd.cmd(), cmd.lba(), cmd.count()); - self.prdt_entries = Some(cmd.pages().len() as u16); + unsafe { + self.cmd_fis + .as_mut() + .setup(cmd.cmd(), cmd.lba(), cmd.count()); + } + + self.prdt_entries = cmd.pages().len(); for (idx, page) in cmd.pages().iter().enumerate() { - self.prdt[idx].setup(page); + unsafe { + self.prdt.as_mut()[idx].setup(page); + } } } - pub fn prdt_len(&self) -> u16 { - self.prdt_entries.unwrap() + pub fn prdt_len(&self) -> usize { + self.prdt_entries } pub fn base(&self) -> PAddr { diff --git a/src/driver/ahci/mod.rs b/src/driver/ahci/mod.rs index ab405829..3ea44ed3 100644 --- a/src/driver/ahci/mod.rs +++ b/src/driver/ahci/mod.rs @@ -1,25 +1,23 @@ -use crate::{ - fs::procfs, - io::Buffer as _, - kernel::{ - block::BlockDevice, - constants::{EINVAL, EIO}, - interrupt::register_irq_handler, - pcie::{self, Header, PCIDevice, PCIDriver, PciError}, - vfs::types::DeviceId, - }, - prelude::*, -}; -use alloc::{format, sync::Arc}; +use alloc::format; +use alloc::sync::Arc; + use async_trait::async_trait; use control::AdapterControl; use defs::*; use eonix_mm::address::{AddrOps as _, PAddr}; use eonix_sync::SpinIrq as _; use port::AdapterPort; - pub(self) use register::Register; +use crate::fs::procfs; +use crate::io::Buffer as _; +use crate::kernel::block::BlockDevice; +use crate::kernel::constants::{EINVAL, EIO}; +use crate::kernel::interrupt::register_irq_handler; +use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; +use crate::kernel::vfs::types::DeviceId; +use crate::prelude::*; + mod command; mod command_table; mod control; @@ -30,7 +28,7 @@ pub(self) mod slot; mod stats; pub struct AHCIDriver { - devices: Spin>>>, + devices: Spin>>, } pub struct BitsIterator { @@ -64,22 +62,22 @@ impl Iterator for BitsIterator { } } -struct Device<'a> { +struct Device { control_base: PAddr, control: AdapterControl, _pcidev: Arc>, /// # Lock /// Might be accessed from irq handler, use with `lock_irq()` - ports: Spin<[Option>>; 32]>, + ports: Spin<[Option>; 32]>, } /// # Safety /// `pcidev` is never accessed from Rust code /// TODO!!!: place *mut pci_device in a safe wrapper -unsafe impl Send for Device<'_> {} -unsafe impl Sync for Device<'_> {} +unsafe impl Send for Device {} +unsafe impl Sync for Device {} -impl Device<'_> { +impl Device { fn handle_interrupt(&self) { // Safety // `self.ports` is accessed inside irq handler @@ -108,8 +106,8 @@ impl Device<'_> { } } -impl Device<'static> { - async fn probe_port(&self, port: Arc>) -> KResult<()> { +impl Device { + async fn probe_port(&self, port: Arc) -> KResult<()> { port.init().await?; { diff --git a/src/driver/ahci/port.rs b/src/driver/ahci/port.rs index 77286ec5..a54bbbba 100644 --- a/src/driver/ahci/port.rs +++ b/src/driver/ahci/port.rs @@ -1,20 +1,18 @@ +use alloc::collections::vec_deque::VecDeque; +use core::task::{Poll, Waker}; + +use async_trait::async_trait; +use eonix_mm::address::{Addr as _, PAddr}; +use eonix_sync::SpinIrq as _; + use super::command::{Command, IdentifyCommand, ReadLBACommand, WriteLBACommand}; -use super::slot::CommandSlot; +use super::slot::CommandList; use super::stats::AdapterPortStats; -use super::{ - CommandHeader, Register, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT, -}; +use super::{Register, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT}; use crate::driver::ahci::command_table::CommandTable; use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::paging::Page; -use crate::kernel::mem::AsMemoryBlock as _; use crate::prelude::*; -use alloc::collections::vec_deque::VecDeque; -use async_trait::async_trait; -use core::pin::pin; -use eonix_mm::address::{Addr as _, PAddr}; -use eonix_sync::{SpinIrq as _, WaitList}; /// An `AdapterPort` is an HBA device in AHCI mode. /// @@ -55,6 +53,8 @@ pub struct AdapterPortData { struct FreeList { free: VecDeque, working: VecDeque, + + wakers: VecDeque, } impl FreeList { @@ -62,57 +62,32 @@ impl FreeList { Self { free: (0..32).collect(), working: VecDeque::new(), + wakers: VecDeque::new(), } } } -pub struct AdapterPort<'a> { +pub struct AdapterPort { pub nport: u32, regs_base: PAddr, - slots: [CommandSlot<'a>; 32], + cmdlist: CommandList, free_list: Spin, - free_list_wait: WaitList, - - /// Holds the command list. - /// **DO NOT USE IT DIRECTLY** - _page: Page, - - cmdlist_base: PAddr, - fis_base: PAddr, stats: AdapterPortStats, } -impl<'a> AdapterPort<'a> { +impl AdapterPort { pub fn new(base: PAddr, nport: u32) -> Self { - let page = Page::alloc(); - let cmdlist_base = page.start(); - let cmdlist_size = 32 * size_of::(); - let fis_base = cmdlist_base + cmdlist_size; - - let (mut cmdheaders, _) = page.as_memblk().split_at(cmdlist_size); - let slots = core::array::from_fn(move |_| { - let (cmdheader, next) = cmdheaders.split_at(size_of::()); - cmdheaders = next; - CommandSlot::new(unsafe { cmdheader.as_ptr().as_mut() }) - }); - Self { nport, regs_base: base + 0x100 + 0x80 * nport as usize, - slots, + cmdlist: CommandList::new(), free_list: Spin::new(FreeList::new()), - free_list_wait: WaitList::new(), - _page: page, stats: AdapterPortStats::new(), - cmdlist_base, - fis_base, } } -} -impl AdapterPort<'_> { fn command_list_base(&self) -> Register { Register::new(self.regs_base + 0x00) } @@ -146,25 +121,16 @@ impl AdapterPort<'_> { } async fn get_free_slot(&self) -> u32 { - loop { - let mut wait = pin!(self.free_list_wait.prepare_to_wait()); - - { - let mut free_list = self.free_list.lock_irq(); - - if let Some(slot) = free_list.free.pop_front() { - return slot; - } - - wait.as_mut().add_to_wait_list(); - - if let Some(slot) = free_list.free.pop_front() { - return slot; - } + core::future::poll_fn(|ctx| { + let mut free_list = self.free_list.lock_irq(); + if let Some(slot) = free_list.free.pop_front() { + return Poll::Ready(slot); } - wait.await; - } + free_list.wakers.push_back(ctx.waker().clone()); + Poll::Pending + }) + .await } fn save_working(&self, slot: u32) { @@ -172,8 +138,10 @@ impl AdapterPort<'_> { } fn release_free_slot(&self, slot: u32) { - self.free_list.lock_irq().free.push_back(slot); - self.free_list_wait.notify_one(); + let mut free_list = self.free_list.lock_irq(); + + free_list.free.push_back(slot); + free_list.wakers.drain(..).for_each(|waker| waker.wake()); } pub fn handle_interrupt(&self) { @@ -187,7 +155,7 @@ impl AdapterPort<'_> { return true; } - self.slots[n as usize].handle_irq(); + self.cmdlist.get(n as usize).handle_irq(); self.stats.inc_int_fired(); false @@ -216,7 +184,7 @@ impl AdapterPort<'_> { cmdtable.setup(cmd); let slot_index = self.get_free_slot().await; - let slot = &self.slots[slot_index as usize]; + let slot = self.cmdlist.get(slot_index as usize); slot.prepare_command(&cmdtable, cmd.write()); self.save_working(slot_index); @@ -229,10 +197,9 @@ impl AdapterPort<'_> { self.stats.inc_cmd_sent(); - if let Err(_) = slot.wait_finish().await { + slot.wait_finish().await.inspect_err(|_| { self.stats.inc_cmd_error(); - return Err(EIO); - }; + })?; self.release_free_slot(slot_index); Ok(()) @@ -251,8 +218,9 @@ impl AdapterPort<'_> { self.stop_command()?; self.command_list_base() - .write(self.cmdlist_base.addr() as u64); - self.fis_base().write(self.fis_base.addr() as u64); + .write(self.cmdlist.cmdlist_base().addr() as u64); + self.fis_base() + .write(self.cmdlist.recv_fis_base().addr() as u64); self.interrupt_enable().write_once(PORT_IE_DEFAULT); @@ -277,7 +245,7 @@ impl AdapterPort<'_> { } #[async_trait] -impl BlockRequestQueue for AdapterPort<'_> { +impl BlockRequestQueue for AdapterPort { fn max_request_pages(&self) -> u64 { 1024 } diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index 60a66de3..06c6f2ec 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -1,20 +1,37 @@ -use super::{command_table::CommandTable, CommandHeader}; +use core::cell::UnsafeCell; +use core::ptr::NonNull; +use core::task::{Poll, Waker}; + +use eonix_mm::address::{Addr as _, PAddr}; +use eonix_sync::{Spin, SpinIrq as _}; + +use super::command_table::CommandTable; +use super::CommandHeader; +use crate::kernel::constants::EIO; +use crate::kernel::mem::paging::AllocZeroed; +use crate::kernel::mem::{Page, PageExt}; use crate::KResult; -use core::pin::pin; -use eonix_mm::address::Addr as _; -use eonix_sync::{Spin, SpinIrq as _, WaitList}; + +pub struct CommandList { + base: NonNull, + _page: Page, +} + +unsafe impl Send for CommandList {} +unsafe impl Sync for CommandList {} pub struct CommandSlot<'a> { - /// # Usage - /// `inner.cmdheader` might be used in irq handler. So in order to wait for - /// commands to finish, we should use `lock_irq` on `inner` - inner: Spin>, - wait_list: WaitList, + cmdheader: &'a UnsafeCell, + /// [`Self::control`] might be used in irq handlers. + control: &'a Spin, } -struct CommandSlotInner<'a> { +unsafe impl Send for CommandSlot<'_> {} +unsafe impl Sync for CommandSlot<'_> {} + +struct SlotControl { state: SlotState, - cmdheader: &'a mut CommandHeader, + waker: Option, } #[derive(Debug, PartialEq, Eq, Clone, Copy)] @@ -25,32 +42,103 @@ enum SlotState { Error, } -impl<'a> CommandSlot<'a> { - pub fn new(cmdheader: &'a mut CommandHeader) -> Self { +impl CommandList { + fn cmdheaders(&self) -> &[UnsafeCell; 32] { + unsafe { self.base.cast().as_ref() } + } + + fn controls_ptr(base: NonNull) -> NonNull> { + // 24 bytes for SlotControl and extra 8 bytes for Spin. + const_assert_eq!(size_of::>(), 32); + + unsafe { base.add(size_of::>() * 32).cast() } + } + + fn controls(&self) -> &[Spin; 32] { + unsafe { Self::controls_ptr(self.base).cast().as_ref() } + } + + pub fn cmdlist_base(&self) -> PAddr { + self._page.start() + } + + pub fn recv_fis_base(&self) -> PAddr { + self._page.start() + + (size_of::>() + size_of::>()) * 32 + } + + pub fn get(&self, index: usize) -> CommandSlot { + CommandSlot { + cmdheader: &self.cmdheaders()[index], + control: &self.controls()[index], + } + } + + pub fn new() -> Self { + let page = Page::zeroed(); + let base = page.get_ptr(); + + let controls_ptr = Self::controls_ptr(base); + + for i in 0..32 { + unsafe { + controls_ptr.add(i).write(Spin::new(SlotControl { + state: SlotState::Idle, + waker: None, + })); + } + } + Self { - inner: Spin::new(CommandSlotInner { - state: SlotState::Idle, - cmdheader, - }), - wait_list: WaitList::new(), + base: page.get_ptr(), + _page: page, } } +} +impl Drop for CommandList { + fn drop(&mut self) { + let controls_ptr = Self::controls_ptr(self.base); + + for i in 0..32 { + unsafe { + controls_ptr.add(i).drop_in_place(); + } + } + } +} + +impl CommandSlot<'_> { pub fn handle_irq(&self) { - let mut inner = self.inner.lock(); - debug_assert_eq!(inner.state, SlotState::Working); + // We are already in the IRQ handler. + let mut control = self.control.lock(); + assert_eq!(control.state, SlotState::Working); + + let cmdheader = unsafe { + // SAFETY: The IRQ handler is only called after the command + // is finished. + &mut *self.cmdheader.get() + }; // TODO: Check errors. - inner.state = SlotState::Finished; - inner.cmdheader.bytes_transferred = 0; - inner.cmdheader.prdt_length = 0; + cmdheader.bytes_transferred = 0; + cmdheader.prdt_length = 0; - self.wait_list.notify_all(); + control.state = SlotState::Finished; + + if let Some(waker) = control.waker.take() { + waker.wake(); + } } pub fn prepare_command(&self, cmdtable: &CommandTable, write: bool) { - let mut inner = self.inner.lock_irq(); - let cmdheader = &mut inner.cmdheader; + let mut control = self.control.lock_irq(); + assert_eq!(control.state, SlotState::Idle); + + let cmdheader = unsafe { + // SAFETY: We are in the idle state. + &mut *self.cmdheader.get() + }; cmdheader.first = 0x05; // FIS type @@ -60,36 +148,37 @@ impl<'a> CommandSlot<'a> { cmdheader.second = 0x00; - cmdheader.prdt_length = cmdtable.prdt_len(); + cmdheader.prdt_length = cmdtable.prdt_len() as u16; cmdheader.bytes_transferred = 0; cmdheader.command_table_base = cmdtable.base().addr() as u64; cmdheader._reserved = [0; 4]; - inner.state = SlotState::Working; + control.state = SlotState::Working; } pub async fn wait_finish(&self) -> KResult<()> { - let mut inner = loop { - let mut wait = pin!(self.wait_list.prepare_to_wait()); - - { - let inner = self.inner.lock_irq(); - if inner.state != SlotState::Working { - break inner; + core::future::poll_fn(|ctx| { + let mut control = self.control.lock_irq(); + + match control.state { + SlotState::Idle => unreachable!("Poll called in idle state"), + SlotState::Working => { + control.waker = Some(ctx.waker().clone()); + Poll::Pending } - wait.as_mut().add_to_wait_list(); + SlotState::Finished => { + control.state = SlotState::Idle; + Poll::Ready(Ok(())) + } + SlotState::Error => { + control.state = SlotState::Idle; - if inner.state != SlotState::Working { - break inner; + // TODO: Report errors. + Poll::Ready(Err(EIO)) } } - - wait.await; - }; - - inner.state = SlotState::Idle; - - Ok(()) + }) + .await } } diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index f362f477..73143c2c 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -1,19 +1,18 @@ -use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; -use crate::kernel::interrupt::register_irq_handler; -use crate::kernel::mem::paging::{self, AllocZeroed}; -use crate::kernel::mem::{AsMemoryBlock, PhysAccess}; -use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; -use crate::net::netdev; -use crate::prelude::*; use alloc::boxed::Box; use alloc::sync::Arc; -use alloc::vec::Vec; -use async_trait::async_trait; use core::ptr::NonNull; + +use async_trait::async_trait; use eonix_hal::fence::memory_barrier; use eonix_mm::address::{Addr, PAddr}; use eonix_sync::SpinIrq; -use paging::Page; + +use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; +use crate::kernel::interrupt::register_irq_handler; +use crate::kernel::mem::{PageExcl, PageExt, PhysAccess}; +use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; +use crate::net::netdev; +use crate::prelude::*; mod defs; @@ -55,13 +54,13 @@ struct E1000eDev { id: u32, regs: Registers, - rt_desc_page: Page, + rt_desc_page: PageExcl, rx_head: Option, rx_tail: Option, tx_tail: Option, - rx_buffers: Option>>, - tx_buffers: Option>>, + rx_buffers: Box<[PageExcl; RX_DESC_SIZE]>, + tx_buffers: Box<[Option; TX_DESC_SIZE]>, } fn test(val: u32, bit: u32) -> bool { @@ -196,7 +195,7 @@ impl netdev::Netdev for E1000eDev { break; } - let ref mut desc = self.rx_desc_table()[next_tail as usize]; + let desc = unsafe { &mut self.rx_desc_table()[next_tail as usize] }; if !test(desc.status as u32, defs::RXD_STAT_DD as u32) { Err(EIO)?; } @@ -204,11 +203,8 @@ impl netdev::Netdev for E1000eDev { desc.status = 0; let len = desc.length as usize; - let buffers = self.rx_buffers.as_mut().ok_or(EIO)?; - let data = unsafe { - // SAFETY: No one could be writing to the buffer at this point. - &buffers[next_tail as usize].as_memblk().as_bytes()[..len] - }; + let buffer = &self.rx_buffers[next_tail as usize]; + let data = &buffer.as_bytes()[..len]; println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data)); self.rx_tail = Some(next_tail); @@ -226,20 +222,17 @@ impl netdev::Netdev for E1000eDev { return Err(EAGAIN); } - let ref mut desc = self.tx_desc_table()[tail as usize]; + let desc = unsafe { &mut self.tx_desc_table()[tail as usize] }; if !test(desc.status as u32, defs::TXD_STAT_DD as u32) { return Err(EIO); } - let buffer_page = Page::alloc(); + let mut buffer_page = PageExcl::alloc(); if buf.len() > buffer_page.len() { return Err(EFAULT); } - unsafe { - // SAFETY: We are the only one writing to this memory block. - buffer_page.as_memblk().as_bytes_mut()[..buf.len()].copy_from_slice(buf); - } + buffer_page.as_bytes_mut()[..buf.len()].copy_from_slice(buf); desc.buffer = PAddr::from(buffer_page.pfn()).addr() as u64; desc.length = buf.len() as u16; @@ -249,9 +242,8 @@ impl netdev::Netdev for E1000eDev { self.tx_tail = Some(next_tail); self.regs.write(defs::REG_TDT, next_tail); - // TODO: check if the packets are sent and update self.tx_head state - - Ok(()) + unimplemented!("Check if the packets are sent and update self.tx_head state"); + // Ok(()) } } @@ -324,26 +316,26 @@ impl E1000eDev { Ok(()) } - fn reset(&self) -> Result<(), u32> { + fn reset(regs: &Registers) -> Result<(), u32> { // disable interrupts so we won't mess things up - self.regs.write(defs::REG_IMC, 0xffffffff); + regs.write(defs::REG_IMC, 0xffffffff); - let ctrl = self.regs.read(defs::REG_CTRL); - self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD); + let ctrl = regs.read(defs::REG_CTRL); + regs.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD); - while self.regs.read(defs::REG_STAT) & defs::STAT_GIOE != 0 { + while regs.read(defs::REG_STAT) & defs::STAT_GIOE != 0 { // wait for link up } - let ctrl = self.regs.read(defs::REG_CTRL); - self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_RST); + let ctrl = regs.read(defs::REG_CTRL); + regs.write(defs::REG_CTRL, ctrl | defs::CTRL_RST); - while self.regs.read(defs::REG_CTRL) & defs::CTRL_RST != 0 { + while regs.read(defs::REG_CTRL) & defs::CTRL_RST != 0 { // wait for reset } // disable interrupts again - self.regs.write(defs::REG_IMC, 0xffffffff); + regs.write(defs::REG_IMC, 0xffffffff); Ok(()) } @@ -360,64 +352,45 @@ impl E1000eDev { Ok(()) } - pub fn new(base: PAddr, irq_no: usize) -> Result { - let page = Page::zeroed(); + pub fn new(base: PAddr, irq_no: usize) -> KResult { + let regs = Registers::new(base); + Self::reset(®s)?; - let mut dev = Self { + let dev = Self { irq_no, - mac: [0; 6], + mac: regs.read_as(0x5400), status: netdev::LinkStatus::Down, speed: netdev::LinkSpeed::SpeedUnknown, id: netdev::alloc_id(), - regs: Registers::new(base), - rt_desc_page: page, + regs, + rt_desc_page: PageExcl::zeroed(), rx_head: None, rx_tail: None, tx_tail: None, - rx_buffers: None, - tx_buffers: None, + rx_buffers: Box::new(core::array::from_fn(|_| PageExcl::alloc_order(2))), + tx_buffers: Box::new([const { None }; 32]), }; - dev.reset()?; - - dev.mac = dev.regs.read_as(0x5400); - dev.tx_buffers = Some(Box::new(Vec::with_capacity(TX_DESC_SIZE))); - - let mut rx_buffers = Box::new(Vec::with_capacity(RX_DESC_SIZE)); - - for index in 0..RX_DESC_SIZE { - let page = Page::alloc_order(2); - - let ref mut desc = dev.rx_desc_table()[index]; - desc.buffer = PAddr::from(page.pfn()).addr() as u64; - desc.status = 0; - - rx_buffers.push(page); - } + unsafe { + for (desc, page) in dev.rx_desc_table().into_iter().zip(dev.rx_buffers.iter()) { + desc.buffer = page.start().addr() as u64; + desc.status = 0; + } - for index in 0..TX_DESC_SIZE { - let ref mut desc = dev.tx_desc_table()[index]; - desc.status = defs::TXD_STAT_DD; + for desc in dev.tx_desc_table() { + desc.status = defs::TXD_STAT_DD; + } } - dev.rx_buffers = Some(rx_buffers); - Ok(dev) } - fn rx_desc_table(&self) -> &mut [RxDescriptor; RX_DESC_SIZE] { - unsafe { - // SAFETY: TODO - self.rt_desc_page.as_memblk().as_ptr().as_mut() - } + unsafe fn rx_desc_table(&self) -> &mut [RxDescriptor; RX_DESC_SIZE] { + self.rt_desc_page.get_ptr().cast().as_mut() } - fn tx_desc_table(&self) -> &mut [TxDescriptor; TX_DESC_SIZE] { - let (_, right) = self.rt_desc_page.as_memblk().split_at(0x200); - unsafe { - // SAFETY: TODO - right.as_ptr().as_mut() - } + unsafe fn tx_desc_table(&self) -> &mut [TxDescriptor; TX_DESC_SIZE] { + self.rt_desc_page.get_ptr().add(0x200).cast().as_mut() } } @@ -425,12 +398,8 @@ impl Drop for E1000eDev { fn drop(&mut self) { assert_eq!(self.status, netdev::LinkStatus::Down); - if let Some(_) = self.rx_buffers.take() {} - - // TODO: we should wait until all packets are sent - if let Some(_) = self.tx_buffers.take() {} - - let _ = self.rt_desc_page; + // TODO: we should wait until all packets are sent before dropping + // tx buffers. } } diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index 86b500b6..c5a3c3d2 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -1,21 +1,19 @@ -use crate::{ - io::Chunks, - kernel::{ - block::{BlockDeviceRequest, BlockRequestQueue}, - constants::EIO, - mem::{AsMemoryBlock, Page}, - }, - prelude::KResult, -}; use alloc::boxed::Box; + use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{Addr, PAddr, PhysAccess}, - paging::PFN, -}; +use eonix_mm::address::{Addr, PAddr, PhysAccess}; +use eonix_mm::paging::PFN; use eonix_sync::Spin; -use virtio_drivers::{device::blk::VirtIOBlk, transport::Transport, Hal}; +use virtio_drivers::device::blk::VirtIOBlk; +use virtio_drivers::transport::Transport; +use virtio_drivers::Hal; + +use crate::io::Chunks; +use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; +use crate::kernel::constants::EIO; +use crate::kernel::mem::{Page, PageExt}; +use crate::prelude::KResult; pub struct HAL; @@ -26,11 +24,10 @@ unsafe impl Hal for HAL { ) -> (virtio_drivers::PhysAddr, core::ptr::NonNull) { let page = Page::alloc_at_least(pages); - let paddr = page.start().addr(); - let ptr = page.as_memblk().as_byte_ptr(); - page.into_raw(); + let ptr = page.get_ptr(); + let pfn = page.into_raw(); - (paddr, ptr) + (PAddr::from(pfn).addr(), ptr) } unsafe fn dma_dealloc( @@ -93,15 +90,14 @@ where buffer, } => { let mut dev = self.lock(); - for ((start, len), buffer_page) in + for ((start, sectors), buffer_page) in Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) { - let buffer = unsafe { - // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. - &buffer_page.as_memblk().as_bytes()[..len as usize * 512] - }; + let len = sectors * 512; + let pg = buffer_page.lock(); - dev.write_blocks(start, buffer).map_err(|_| EIO)?; + dev.write_blocks(start, &pg.as_bytes()[..len]) + .map_err(|_| EIO)?; } } BlockDeviceRequest::Read { @@ -110,15 +106,14 @@ where buffer, } => { let mut dev = self.lock(); - for ((start, len), buffer_page) in + for ((start, sectors), buffer_page) in Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter()) { - let buffer = unsafe { - // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us. - &mut buffer_page.as_memblk().as_bytes_mut()[..len as usize * 512] - }; + let len = sectors * 512; + let mut pg = buffer_page.lock(); - dev.read_blocks(start, buffer).map_err(|_| EIO)?; + dev.read_blocks(start, &mut pg.as_bytes_mut()[..len]) + .map_err(|_| EIO)?; } } } diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 1104337d..9a4e03ec 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,45 +1,38 @@ mod dir; mod file; +use alloc::sync::{Arc, Weak}; use core::future::Future; use core::ops::Deref; -use alloc::sync::{Arc, Weak}; use async_trait::async_trait; use dir::{as_raw_dirents, ParseDirent}; use eonix_sync::RwLock; use itertools::Itertools; +use crate::io::{Buffer, ByteBuffer, UninitBuffer}; +use crate::kernel::block::{BlockDevice, BlockDeviceRequest}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::{AsMemoryBlock, CachePageStream}; +use crate::kernel::mem::{ + CachePage, CachePageStream, Page, PageCache, PageCacheBackendOps, PageExcl, PageExt, +}; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{ + Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, +}; +use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; use crate::kernel::vfs::types::{DeviceId, Format, Permission}; use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; use crate::prelude::*; -use crate::{ - io::{Buffer, ByteBuffer, UninitBuffer}, - kernel::{ - block::{BlockDevice, BlockDeviceRequest}, - mem::{ - paging::Page, - {CachePage, PageCache, PageCacheBackendOps}, - }, - vfs::{ - dentry::Dentry, - inode::{Ino, Inode}, - mount::{register_filesystem, Mount, MountCreator}, - }, - }, - KResult, -}; +use crate::KResult; #[repr(transparent)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] struct Cluster(u32); #[repr(transparent)] -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] struct RawCluster(pub u32); impl RawCluster { @@ -70,7 +63,7 @@ impl Cluster { const SECTOR_SIZE: usize = 512; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] #[repr(C, packed)] struct Bootsector { jmp: [u8; 3], @@ -302,10 +295,8 @@ impl InodeFileOps for FileInode { for cluster in cluster_iter { fs.read_cluster(cluster, &buffer_page).await?; - let data = unsafe { - // SAFETY: We are the only one holding this page. - &buffer_page.as_memblk().as_bytes()[inner_offset..] - }; + let pg = buffer_page.lock(); + let data = &pg.as_bytes()[inner_offset..]; let end = offset + data.len(); let real_end = end.min(self.info.lock().size as usize); @@ -340,7 +331,7 @@ struct DirInode { sb: SbRef, // TODO: Use the new PageCache... - dir_pages: RwLock>, + dir_pages: RwLock>, } impl DirInode { @@ -375,7 +366,7 @@ impl DirInode { let clusters = ClusterIterator::new(fat.as_ref(), self.cluster); for cluster in clusters { - let page = Page::alloc(); + let page = PageExcl::alloc(); fs.read_cluster(cluster, &page).await?; dir_pages.push(page); @@ -384,7 +375,7 @@ impl DirInode { Ok(()) } - async fn get_dir_pages(&self) -> KResult> + use<'_>> { + async fn get_dir_pages(&self) -> KResult> + use<'_>> { { let dir_pages = self.dir_pages.read().await; if !dir_pages.is_empty() { @@ -432,12 +423,7 @@ impl InodeDirOps for DirInode { let sb = self.sb.get()?; let dir_pages = self.get_dir_pages().await?; - let dir_data = dir_pages.iter().map(|page| { - unsafe { - // SAFETY: No one could be writing to it. - page.as_memblk().as_bytes() - } - }); + let dir_data = dir_pages.iter().map(|pg| pg.as_bytes()); let raw_dirents = dir_data .map(as_raw_dirents) @@ -481,12 +467,10 @@ impl InodeDirOps for DirInode { let inner_offset = offset % cluster_size; let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); - let dir_data = dir_pages.iter().skip(cluster_offset).map(|page| { - unsafe { - // SAFETY: No one could be writing to it. - page.as_memblk().as_bytes() - } - }); + let dir_data = dir_pages + .iter() + .skip(cluster_offset) + .map(|pg| pg.as_bytes()); let raw_dirents = dir_data .map(as_raw_dirents) diff --git a/src/io.rs b/src/io.rs index 85675dea..d7094f6d 100644 --- a/src/io.rs +++ b/src/io.rs @@ -1,6 +1,8 @@ +use core::mem::MaybeUninit; +use core::ops::{Add, AddAssign, Sub}; + use crate::kernel::constants::EFAULT; use crate::prelude::*; -use core::{cmp, mem::MaybeUninit}; #[must_use] #[derive(Debug)] @@ -236,18 +238,26 @@ impl Buffer for ByteBuffer<'_> { } } +pub trait Integer: + Add + Sub + AddAssign + Copy + PartialOrd + Ord +{ +} + +impl Integer for u64 {} +impl Integer for usize {} + /// Iterator that generates chunks of a given length from a start index /// until the end of the total length. /// /// The iterator returns a tuple of (start, len) for each chunk. -pub struct Chunks { - end: usize, - cur: usize, - chunk_len: usize, +pub struct Chunks { + end: T, + cur: T, + chunk_len: T, } -impl Chunks { - pub const fn new(start: usize, total_len: usize, chunk_len: usize) -> Self { +impl Chunks { + pub fn new(start: T, total_len: T, chunk_len: T) -> Self { Self { end: start + total_len, cur: start, @@ -256,8 +266,8 @@ impl Chunks { } } -impl Iterator for Chunks { - type Item = (usize, usize); +impl Iterator for Chunks { + type Item = (T, T); fn next(&mut self) -> Option { if self.cur >= self.end { @@ -265,7 +275,7 @@ impl Iterator for Chunks { } let start = self.cur; - let len = cmp::min(self.chunk_len, self.end - start); + let len = self.chunk_len.min(self.end - start); self.cur += self.chunk_len; Some((start, len)) diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 3e4b65d1..8e017336 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -1,23 +1,20 @@ mod mbr; -use super::{ - constants::ENOENT, - mem::{paging::Page, AsMemoryBlock as _}, - vfs::types::DeviceId, -}; -use crate::kernel::constants::{EEXIST, EINVAL}; -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; -use alloc::{ - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, -}; -use async_trait::async_trait; +use alloc::collections::btree_map::{BTreeMap, Entry}; +use alloc::sync::Arc; use core::cmp::Ordering; + +use async_trait::async_trait; use mbr::MBRPartTable; +use super::constants::ENOENT; +use super::mem::paging::Page; +use super::mem::PageExt; +use super::vfs::types::DeviceId; +use crate::io::{Buffer, Chunks, FillResult}; +use crate::kernel::constants::{EEXIST, EINVAL}; +use crate::prelude::*; + pub struct Partition { pub lba_offset: u64, pub sector_count: u64, @@ -193,177 +190,72 @@ impl BlockDevice { /// `offset` - offset in bytes /// pub async fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult { - let mut sector_start = offset as u64 / 512; - let mut first_sector_offset = offset as u64 % 512; - let mut sector_count = (first_sector_offset + buffer.total() as u64 + 511) / 512; - - let mut nfilled = 0; - 'outer: while sector_count != 0 { - let pages: &[Page]; - let page: Option; - let page_vec: Option>; - - let nread; - - match sector_count { - count if count <= 8 => { - nread = count; - - let _page = Page::alloc(); - page = Some(_page); - pages = core::slice::from_ref(page.as_ref().unwrap()); + let sector_start = offset as u64 / 512; + let mut first_sector_offset = offset % 512; + let nr_sectors = (first_sector_offset + buffer.total() + 511) / 512; + + let nr_sectors_per_batch = self.queue().max_request_pages() / 2 * 2 * 8; + + let mut nr_filled = 0; + for (start, nr_batch) in Chunks::new(sector_start, nr_sectors as u64, nr_sectors_per_batch) + { + let (page_slice, page, mut page_vec); + match nr_batch { + ..=8 => { + page = Page::alloc(); + page_slice = core::slice::from_ref(&page); } - count if count <= 16 => { - nread = count; - - let _pages = Page::alloc_order(1); - page = Some(_pages); - pages = core::slice::from_ref(page.as_ref().unwrap()); + ..=16 => { + page = Page::alloc_order(1); + page_slice = core::slice::from_ref(&page); + } + ..=32 => { + page = Page::alloc_order(2); + page_slice = core::slice::from_ref(&page); } count => { - nread = count.min(self.queue().max_request_pages()); + let nr_huge_pages = count as usize / 32; + let nr_small_pages = ((count as usize % 32) + 7) / 8; - let npages = (nread + 15) / 16; - let mut _page_vec = Vec::with_capacity(npages as usize); - for _ in 0..npages { - _page_vec.push(Page::alloc_order(1)); - } - page_vec = Some(_page_vec); - pages = page_vec.as_ref().unwrap().as_slice(); + let nr_pages = nr_huge_pages + nr_small_pages; + page_vec = Vec::with_capacity(nr_pages); + + page_vec.resize_with(nr_huge_pages, || Page::alloc_order(2)); + page_vec.resize_with(nr_pages, || Page::alloc()); + page_slice = &page_vec; } } let req = BlockDeviceRequest::Read { - sector: sector_start, - count: nread, - buffer: &pages, + sector: start, + count: nr_batch, + buffer: page_slice, }; self.commit_request(req).await?; - for page in pages.iter() { - // SAFETY: We are the only owner of the page so no one could be mutating it. - let data = unsafe { &page.as_memblk().as_bytes()[first_sector_offset as usize..] }; + for page in page_slice { + let pg = page.lock(); + let data = &pg.as_bytes()[first_sector_offset..]; first_sector_offset = 0; - match buffer.fill(data)? { - FillResult::Done(n) => nfilled += n, - FillResult::Partial(n) => { - nfilled += n; - break 'outer; - } - FillResult::Full => { - break 'outer; - } - } - } - - sector_start += nread; - sector_count -= nread; - } - - if nfilled == buffer.total() { - Ok(FillResult::Done(nfilled)) - } else { - Ok(FillResult::Partial(nfilled)) - } - } - - /// Write some data to the block device, may involve some copy and fragmentation - /// - /// # Arguments - /// `offset` - offset in bytes - /// `data` - data to write - /// - pub async fn write_some(&self, offset: usize, data: &[u8]) -> KResult { - let mut sector_start = offset as u64 / 512; - let mut first_sector_offset = offset as u64 % 512; - let mut remaining_data = data; - let mut nwritten = 0; - - while !remaining_data.is_empty() { - let pages: &[Page]; - let page: Option; - let page_vec: Option>; - - // Calculate sectors needed for this write - let write_end = first_sector_offset + remaining_data.len() as u64; - let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages()); - - match sector_count { - count if count <= 8 => { - let _page = Page::alloc(); - page = Some(_page); - pages = core::slice::from_ref(page.as_ref().unwrap()); - } - count if count <= 16 => { - let _pages = Page::alloc_order(1); - page = Some(_pages); - pages = core::slice::from_ref(page.as_ref().unwrap()); - } - count => { - let npages = (count + 15) / 16; - let mut _page_vec = Vec::with_capacity(npages as usize); - for _ in 0..npages { - _page_vec.push(Page::alloc_order(1)); - } - page_vec = Some(_page_vec); - pages = page_vec.as_ref().unwrap().as_slice(); - } - } - - if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize { - let read_req = BlockDeviceRequest::Read { - sector: sector_start, - count: sector_count, - buffer: pages, - }; - self.commit_request(read_req).await?; - } - - let mut data_offset = 0; - let mut page_offset = first_sector_offset as usize; + nr_filled += buffer.fill(data)?.allow_partial(); - for page in pages.iter() { - // SAFETY: We own the page and can modify it - let page_data = unsafe { - let memblk = page.as_memblk(); - core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len()) - }; - - let copy_len = - (remaining_data.len() - data_offset).min(page_data.len() - page_offset); - - if copy_len == 0 { - break; - } - - page_data[page_offset..page_offset + copy_len] - .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]); - - data_offset += copy_len; - page_offset = 0; // Only first page has offset - - if data_offset >= remaining_data.len() { + if buffer.available() == 0 { break; } } - let write_req = BlockDeviceRequest::Write { - sector: sector_start, - count: sector_count, - buffer: pages, - }; - self.commit_request(write_req).await?; - - let bytes_written = data_offset; - nwritten += bytes_written; - remaining_data = &remaining_data[bytes_written..]; - sector_start += sector_count; - first_sector_offset = 0; + if buffer.available() == 0 { + break; + } } - Ok(nwritten) + if buffer.available() == 0 { + Ok(FillResult::Done(nr_filled)) + } else { + Ok(FillResult::Partial(nr_filled)) + } } } diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index c147306e..bfc826bf 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -8,9 +8,9 @@ mod mm_list; mod page_alloc; mod page_cache; -pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess}; +pub use access::PhysAccess; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}; -pub use paging::{Page, PageBuffer}; +pub use paging::{Page, PageBuffer, PageExcl, PageExt}; diff --git a/src/kernel/mem/access.rs b/src/kernel/mem/access.rs index ce525a0a..328dcfbd 100644 --- a/src/kernel/mem/access.rs +++ b/src/kernel/mem/access.rs @@ -1,22 +1,7 @@ -use core::{num::NonZero, ptr::NonNull}; +use core::ptr::NonNull; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess as _PhysAccess}; -/// A block of memory starting at a non-zero address and having a specific length. -/// -/// This struct is used to represent a memory block that can be accessed -/// in the kernel space. -pub struct MemoryBlock { - addr: NonZero, - len: usize, -} - -pub trait AsMemoryBlock { - /// Translate the physical page the page object pointing to into kernel - /// accessible pointer. Use it with care. - fn as_memblk(&self) -> MemoryBlock; -} - pub trait PhysAccess { /// Translate the data that this address is pointing to into kernel /// accessible pointer. Use it with care. @@ -30,107 +15,6 @@ pub trait PhysAccess { unsafe fn as_ptr(&self) -> NonNull; } -impl MemoryBlock { - /// Create a new `MemoryBlock` with the given address and length. - /// - /// # Safety - /// The caller must ensure that the address is valid. - /// Otherwise, it may lead to undefined behavior. - pub unsafe fn new(addr: NonZero, len: usize) -> Self { - Self { addr, len } - } - - /// Get the start address of the memory block. - #[allow(dead_code)] - pub fn addr(&self) -> NonZero { - self.addr - } - - /// Get the length of the memory block. - #[allow(dead_code)] - pub fn len(&self) -> usize { - self.len - } - - /// Split the memory block into two parts at the given offset. - pub fn split_at(&self, at: usize) -> (Self, Self) { - if at > self.len { - panic!("Out of bounds"); - } - - let rhs_start = self.addr.checked_add(at).expect("Overflow"); - - let lhs = unsafe { Self::new(self.addr, at) }; - let rhs = unsafe { Self::new(rhs_start, self.len - at) }; - - (lhs, rhs) - } - - /// Provide a pointer to the data. - /// - /// # Safety - /// Using the returned pointer is undefined behavior if the address is not - /// properly aligned or the size is not equal to the size of `T`. - pub unsafe fn as_ptr_unchecked(&self) -> NonNull { - // SAFETY: `self.addr` is a non-zero value. - NonNull::new_unchecked(self.addr.get() as *mut T) - } - - /// Provide a pointer to the data. - /// - /// # Panic - /// Panic if the address is not properly aligned. - pub fn as_ptr(&self) -> NonNull { - let alignment = align_of::(); - - if self.addr.get() % alignment != 0 { - panic!("Alignment error"); - } - - unsafe { - // SAFETY: We've checked that `self.addr` is properly aligned. - self.as_ptr_unchecked() - } - } - - /// Provide a pointer to the bytes. - pub fn as_byte_ptr(&self) -> NonNull { - unsafe { - // SAFETY: No alignment check is needed for bytes. - self.as_ptr_unchecked() - } - } - - /// Provide immutable access to the data it pointed to. - /// - /// # Safety - /// This function is unsafe because it returns an immutable reference with - /// a created lifetime. - /// - /// The caller must ensure that the data has no other mutable aliases while - /// the reference is in use. Otherwise, it may lead to undefined behavior. - pub unsafe fn as_bytes<'a>(&self) -> &'a [u8] { - core::slice::from_raw_parts(self.as_ptr_unchecked().as_ptr(), self.len) - } - - /// Provide mutable access to the data it pointed to. - /// - /// # Panic - /// Panic if the address is not properly aligned or the size is not - /// equal to the size of `T`. - /// - /// # Safety - /// This function is unsafe because it returns a mutable reference with a - /// created lifetime. - /// - /// The caller must ensure that the data has no other immutable or mutable - /// aliases while the reference is in use. - /// Otherwise, it may lead to undefined behavior. - pub unsafe fn as_bytes_mut<'a>(&mut self) -> &'a mut [u8] { - core::slice::from_raw_parts_mut(self.as_ptr_unchecked().as_ptr(), self.len) - } -} - impl PhysAccess for PAddr { unsafe fn as_ptr(&self) -> NonNull { ArchPhysAccess::as_ptr(*self) diff --git a/src/kernel/mem/allocator.rs b/src/kernel/mem/allocator.rs index 36b19612..9e5df69b 100644 --- a/src/kernel/mem/allocator.rs +++ b/src/kernel/mem/allocator.rs @@ -1,13 +1,15 @@ -use super::page_alloc::RawPagePtr; -use super::{AsMemoryBlock, GlobalPageAlloc, Page}; use core::alloc::{GlobalAlloc, Layout}; use core::ptr::NonNull; + use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::PhysAccess; use eonix_mm::paging::{PAGE_SIZE_BITS, PFN}; use eonix_sync::LazyLock; use slab_allocator::SlabAllocator; +use super::page_alloc::RawPagePtr; +use super::{GlobalPageAlloc, Page, PageExt}; + static SLAB_ALLOCATOR: LazyLock> = LazyLock::new(|| SlabAllocator::new_in(GlobalPageAlloc)); @@ -23,7 +25,7 @@ unsafe impl GlobalAlloc for Allocator { let page_count = size >> PAGE_SIZE_BITS; let page = Page::alloc_at_least(page_count); - let ptr = page.as_memblk().as_ptr(); + let ptr = page.get_ptr(); page.into_raw(); ptr.as_ptr() diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 731c5303..dcbeeb63 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -1,15 +1,17 @@ -use super::mm_list::EMPTY_PAGE; -use super::paging::AllocZeroed as _; -use super::{AsMemoryBlock, Mapping, Page, Permission}; -use crate::kernel::constants::EINVAL; -use crate::prelude::KResult; use core::borrow::Borrow; use core::cell::UnsafeCell; use core::cmp; + use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; use eonix_mm::paging::{PAGE_SIZE, PFN}; +use super::mm_list::EMPTY_PAGE; +use super::{Mapping, Page, Permission}; +use crate::kernel::constants::EINVAL; +use crate::kernel::mem::{PageExcl, PageExt}; +use crate::prelude::KResult; + #[derive(Debug)] pub struct MMArea { range: UnsafeCell, @@ -105,25 +107,23 @@ impl MMArea { return; } - let new_page; + let mut new_page; if *pfn == EMPTY_PAGE.pfn() { - new_page = Page::zeroed(); + new_page = PageExcl::zeroed(); } else { - new_page = Page::alloc(); + new_page = PageExcl::alloc(); unsafe { // SAFETY: `page` is CoW, which means that others won't write to it. - let old_page_data = page.as_memblk().as_bytes(); - - // SAFETY: `new_page` is exclusive owned by us. - let new_page_data = new_page.as_memblk().as_bytes_mut(); + let old_page_data = page.get_bytes_ptr().as_ref(); + let new_page_data = new_page.as_bytes_mut(); new_page_data.copy_from_slice(old_page_data); }; } attr.remove(PageAttribute::ACCESSED); - *pfn = new_page.into_raw(); + *pfn = new_page.into_page().into_raw(); } /// # Arguments @@ -156,13 +156,12 @@ impl MMArea { // Bss is embarrassing in pagecache! // We have to assume cnt_to_read < PAGE_SIZE all bss if cnt_to_read < PAGE_SIZE { - let new_page = Page::zeroed(); - unsafe { - let page_data = new_page.as_memblk().as_bytes_mut(); - page_data[..cnt_to_read] - .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]); - } - *pfn = new_page.into_raw(); + let mut new_page = PageExcl::zeroed(); + + new_page.as_bytes_mut()[..cnt_to_read] + .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); + + *pfn = new_page.into_page().into_raw(); } else { *pfn = page.clone().into_raw(); } @@ -182,13 +181,12 @@ impl MMArea { cache_page.set_dirty(); *pfn = page.clone().into_raw(); } else { - let new_page = Page::zeroed(); - unsafe { - let page_data = new_page.as_memblk().as_bytes_mut(); - page_data[..cnt_to_read] - .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]); - } - *pfn = new_page.into_raw(); + let mut new_page = PageExcl::zeroed(); + + new_page.as_bytes_mut()[..cnt_to_read] + .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); + + *pfn = new_page.into_page().into_raw(); } attr.insert(PageAttribute::WRITE); diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index ad1e45c2..17dc1b05 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -1,33 +1,30 @@ mod mapping; mod page_fault; -use super::address::{VAddrExt as _, VRangeExt as _}; -use super::page_alloc::GlobalPageAlloc; -use super::paging::AllocZeroed as _; -use super::{AsMemoryBlock, MMArea, Page}; -use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; -use crate::kernel::mem::page_alloc::RawPagePtr; -use crate::{prelude::*, sync::ArcSwap}; use alloc::collections::btree_set::BTreeSet; use core::fmt; use core::sync::atomic::{AtomicUsize, Ordering}; + use eonix_hal::mm::{ flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchPagingMode, ArchPhysAccess, GLOBAL_PAGE_TABLE, }; -use eonix_mm::address::{Addr as _, PAddr}; -use eonix_mm::page_table::PageAttribute; -use eonix_mm::paging::PFN; -use eonix_mm::{ - address::{AddrOps as _, VAddr, VRange}, - page_table::{PageTable, RawAttribute, PTE}, - paging::PAGE_SIZE, -}; +use eonix_mm::address::{Addr as _, AddrOps as _, PAddr, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, PageTable, RawAttribute, PTE}; +use eonix_mm::paging::{PAGE_SIZE, PFN}; use eonix_sync::{LazyLock, Mutex}; - pub use mapping::{FileMapping, Mapping}; pub use page_fault::handle_kernel_page_fault; +use super::address::{VAddrExt as _, VRangeExt as _}; +use super::page_alloc::GlobalPageAlloc; +use super::paging::AllocZeroed as _; +use super::{MMArea, Page, PageExt}; +use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; +use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::prelude::*; +use crate::sync::ArcSwap; + pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| Page::zeroed()); #[derive(Debug, Clone, Copy)] @@ -697,12 +694,10 @@ impl MMList { unsafe { // SAFETY: We are sure that the page is valid and we have the right to access it. Page::with_raw(pte.get_pfn(), |page| { - // SAFETY: The caller guarantees that no one else is using the page. - let page_data = page.as_memblk().as_bytes_mut(); - func( - offset + idx * 0x1000, - &mut page_data[start_offset..end_offset], - ); + let mut pg = page.lock(); + let page_data = &mut pg.as_bytes_mut()[start_offset..end_offset]; + + func(offset + idx * 0x1000, page_data); }); } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 54d4d590..08536693 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -81,7 +81,7 @@ pub struct RawPagePtr(NonNull); impl PageFlags { pub const PRESENT: u32 = 1 << 0; - // pub const LOCKED: u32 = 1 << 1; + pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; pub const SLAB: u32 = 1 << 3; pub const DIRTY: u32 = 1 << 4; @@ -99,6 +99,13 @@ impl PageFlags { pub fn clear(&self, flag: u32) { self.0.fetch_and(!flag, Ordering::Relaxed); } + + /// Set the flag and return whether it was already set. + /// + /// If multiple flags are given, returns true if any of them were already set. + pub fn test_and_set(&self, flag: u32) -> bool { + (self.0.fetch_or(flag, Ordering::Relaxed) & flag) != 0 + } } impl RawPagePtr { diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 9deb50cf..6a1c04ca 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -1,22 +1,23 @@ -use super::{paging::AllocZeroed, Page}; -use crate::{ - io::{Buffer, FillResult, Stream}, - kernel::mem::page_alloc::RawPagePtr, - prelude::KResult, - GlobalPageAlloc, -}; -use align_ext::AlignExt; use alloc::boxed::Box; -use alloc::{collections::btree_map::BTreeMap, sync::Weak}; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Weak; +use core::future::Future; +use core::mem::ManuallyDrop; + +use align_ext::AlignExt; use async_trait::async_trait; -use core::{future::Future, mem::ManuallyDrop}; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{PAddr, PhysAccess}, - paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}, -}; +use eonix_mm::address::{PAddr, PhysAccess}; +use eonix_mm::paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; use eonix_sync::Mutex; +use super::paging::AllocZeroed; +use super::Page; +use crate::io::{Buffer, FillResult, Stream}; +use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::prelude::KResult; +use crate::GlobalPageAlloc; + pub struct PageCache { pages: Mutex>, backend: Weak, @@ -315,14 +316,6 @@ impl CachePageStream { pub fn new(page: CachePage) -> Self { Self { page, cur: 0 } } - - pub fn remaining(&self) -> usize { - self.page.valid_size().saturating_sub(self.cur) - } - - pub fn is_drained(&self) -> bool { - self.cur >= self.page.valid_size() - } } impl Stream for CachePageStream { diff --git a/src/kernel/mem/paging.rs b/src/kernel/mem/paging.rs index 8c5f41f2..bca573fb 100644 --- a/src/kernel/mem/paging.rs +++ b/src/kernel/mem/paging.rs @@ -1,41 +1,54 @@ -use super::{access::AsMemoryBlock, page_alloc::GlobalPageAlloc, MemoryBlock, PhysAccess}; +use core::ops::Deref; +use core::ptr::NonNull; + +use eonix_mm::paging::Page as GenericPage; + +use super::page_alloc::GlobalPageAlloc; +use super::PhysAccess; use crate::io::{Buffer, FillResult}; -use eonix_mm::paging::{Page as GenericPage, PageAlloc}; pub type Page = GenericPage; /// A buffer that wraps a page and provides a `Buffer` interface. pub struct PageBuffer { - page: Page, + page: PageExcl, offset: usize, } +pub struct PageLocked<'a> { + page: &'a Page, +} + +/// A page that is exclusively owned. +#[repr(transparent)] +pub struct PageExcl(Page); + pub trait AllocZeroed { fn zeroed() -> Self; } -impl AsMemoryBlock for GenericPage { - fn as_memblk(&self) -> MemoryBlock { - unsafe { - // SAFETY: `self.start()` points to valid memory of length `self.len()`. - MemoryBlock::new(self.start().as_ptr::<()>().addr(), self.len()) - } +pub trait PageExt { + fn lock(&self) -> PageLocked; + + /// Get a vmem pointer to the page data as a byte slice. + fn get_bytes_ptr(&self) -> NonNull<[u8]>; + + /// Get a vmem pointer to the start of the page. + fn get_ptr(&self) -> NonNull { + self.get_bytes_ptr().cast() } } impl PageBuffer { pub fn new() -> Self { Self { - page: Page::alloc(), + page: PageExcl::alloc(), offset: 0, } } pub fn all(&self) -> &[u8] { - unsafe { - // SAFETY: The page is exclusivly owned by us. - self.page.as_memblk().as_bytes() - } + self.page.as_bytes() } pub fn data(&self) -> &[u8] { @@ -43,10 +56,7 @@ impl PageBuffer { } pub fn available_mut(&mut self) -> &mut [u8] { - unsafe { - // SAFETY: The page is exclusivly owned by us. - &mut self.page.as_memblk().as_bytes_mut()[self.offset..] - } + &mut self.page.as_bytes_mut()[self.offset..] } } @@ -80,10 +90,87 @@ impl Buffer for PageBuffer { impl AllocZeroed for Page { fn zeroed() -> Self { let page = Self::alloc(); + + page.lock().as_bytes_mut().fill(0); + + page + } +} + +impl PageExt for Page { + fn lock(&self) -> PageLocked { + // TODO: Actually perform the lock. + PageLocked { page: self } + } + + fn get_bytes_ptr(&self) -> NonNull<[u8]> { unsafe { - // SAFETY: The page is exclusivly owned by us. - page.as_memblk().as_bytes_mut().fill(0); + // SAFETY: `self.start()` can't be null. + NonNull::slice_from_raw_parts(self.start().as_ptr(), self.len()) } - page + } +} + +impl PageLocked<'_> { + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts(self.start().as_ptr().as_ptr(), self.len()) + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts_mut(self.start().as_ptr().as_ptr(), self.len()) + } + } +} + +impl Deref for PageLocked<'_> { + type Target = Page; + + fn deref(&self) -> &Self::Target { + self.page + } +} + +impl PageExcl { + pub fn alloc() -> Self { + Self(Page::alloc()) + } + + pub fn alloc_order(order: u32) -> Self { + Self(Page::alloc_order(order)) + } + + pub fn zeroed() -> Self { + Self(Page::zeroed()) + } + + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_ref() + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_mut() + } + } + + pub fn into_page(self) -> Page { + self.0 + } +} + +impl Deref for PageExcl { + type Target = Page; + + fn deref(&self) -> &Self::Target { + &self.0 } } diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs index bb1c66ec..eb00cc4c 100644 --- a/src/kernel/vfs/file/mod.rs +++ b/src/kernel/vfs/file/mod.rs @@ -2,29 +2,24 @@ mod inode_file; mod pipe; mod terminal_file; -use crate::{ - io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}, - kernel::{ - constants::{EBADF, EINTR, EINVAL, ENOTTY}, - mem::{AsMemoryBlock, Page}, - task::Thread, - CharDevice, - }, - prelude::KResult, -}; use alloc::sync::Arc; -use bitflags::bitflags; -use core::{ - ops::Deref, - sync::atomic::{AtomicI32, AtomicU32, Ordering}, -}; -use pipe::{PipeReadEnd, PipeWriteEnd}; -use posix_types::open::OpenFlags; +use core::ops::Deref; +use core::sync::atomic::{AtomicI32, AtomicU32, Ordering}; +use bitflags::bitflags; pub use inode_file::InodeFile; pub use pipe::Pipe; +use pipe::{PipeReadEnd, PipeWriteEnd}; +use posix_types::open::OpenFlags; pub use terminal_file::TerminalFile; +use crate::io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}; +use crate::kernel::constants::{EBADF, EINTR, EINVAL, ENOTTY}; +use crate::kernel::mem::PageExcl; +use crate::kernel::task::Thread; +use crate::kernel::CharDevice; +use crate::prelude::KResult; + pub enum FileType { Inode(InodeFile), PipeRead(PipeReadEnd), @@ -99,9 +94,8 @@ impl FileType { } pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let buffer_page = Page::alloc(); - // SAFETY: We are the only owner of the page. - let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() }; + let mut buffer_page = PageExcl::alloc(); + let buffer = buffer_page.as_bytes_mut(); self.sendfile_check()?; diff --git a/src/lib.rs b/src/lib.rs index 98e196f8..959cb29f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,9 @@ extern crate alloc; +#[macro_use] +extern crate static_assertions; + #[cfg(any(target_arch = "riscv64", target_arch = "x86_64"))] extern crate unwinding; @@ -28,36 +31,33 @@ mod prelude; mod rcu; mod sync; -use crate::kernel::task::alloc_pid; -use alloc::{ffi::CString, sync::Arc}; -use core::{ - hint::spin_loop, - sync::atomic::{AtomicBool, AtomicUsize, Ordering}, -}; -use eonix_hal::{ - arch_exported::bootstrap::shutdown, - context::TaskContext, - processor::{halt, CPU, CPU_COUNT}, - traits::{context::RawTaskContext, trap::IrqState}, - trap::disable_irqs_save, -}; +use alloc::ffi::CString; +use alloc::sync::Arc; +use core::hint::spin_loop; +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +use eonix_hal::arch_exported::bootstrap::shutdown; +use eonix_hal::context::TaskContext; +use eonix_hal::processor::{halt, CPU, CPU_COUNT}; +use eonix_hal::traits::context::RawTaskContext; +use eonix_hal::traits::trap::IrqState; +use eonix_hal::trap::disable_irqs_save; use eonix_mm::address::PRange; -use eonix_runtime::{executor::Stack, scheduler::RUNTIME}; -use kernel::{ - mem::GlobalPageAlloc, - task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}, - vfs::{ - dentry::Dentry, - mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}, - types::Permission, - FsContext, - }, - CharDevice, -}; +use eonix_runtime::executor::Stack; +use eonix_runtime::scheduler::RUNTIME; +use kernel::mem::GlobalPageAlloc; +use kernel::task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}; +use kernel::vfs::dentry::Dentry; +use kernel::vfs::mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}; +use kernel::vfs::types::Permission; +use kernel::vfs::FsContext; +use kernel::CharDevice; use kernel_init::setup_memory; use path::Path; use prelude::*; +use crate::kernel::task::alloc_pid; + #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] fn do_panic() -> ! { #[cfg(target_arch = "riscv64")] From 922324322b1d0dd75d1b50494785504035d8d598 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 23 Dec 2025 00:12:42 +0800 Subject: [PATCH 06/25] tls: rework of arch's UserTLS design Separate old UserTLS into UserTLS and UserTLSDescriptor. UserTLS is for threads to hold infomation about its storage. Descriptors are used in clone syscalls. Signed-off-by: greatbridf --- Cargo.lock | 5 +- Cargo.toml | 1 + crates/eonix_hal/src/arch/riscv64/cpu.rs | 17 --- crates/eonix_hal/src/lib.rs | 2 +- .../posix_types/src/syscall_no/loongarch64.rs | 3 +- crates/posix_types/src/syscall_no/riscv64.rs | 2 +- src/kernel/interrupt.rs | 12 +- src/kernel/pcie/init.rs | 18 +-- src/kernel/syscall/file_rw.rs | 67 +++++---- src/kernel/syscall/procops.rs | 134 +++++++----------- src/kernel/task.rs | 21 ++- src/kernel/task/clone.rs | 42 +++--- src/kernel/task/thread.rs | 76 ++++------ src/kernel/task/user_tls/mod.rs | 34 +++++ src/kernel/task/user_tls/x86_64.rs | 83 +++++++++++ src/kernel/user/dataflow.rs | 21 ++- 16 files changed, 304 insertions(+), 234 deletions(-) create mode 100644 src/kernel/task/user_tls/mod.rs create mode 100644 src/kernel/task/user_tls/x86_64.rs diff --git a/Cargo.lock b/Cargo.lock index c70190a0..5487f284 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,9 +75,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "critical-section" @@ -164,6 +164,7 @@ dependencies = [ "atomic_unique_refcell", "bitflags", "buddy_allocator", + "cfg-if", "eonix_hal", "eonix_log", "eonix_macros", diff --git a/Cargo.toml b/Cargo.toml index dca5d34d..4fcb6f70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ futures = { version = "0.3.31", features = [ "async-await", ], default-features = false } static_assertions = "1.1.0" +cfg-if = "1.0.4" [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies] virtio-drivers = { version = "0.11.0" } diff --git a/crates/eonix_hal/src/arch/riscv64/cpu.rs b/crates/eonix_hal/src/arch/riscv64/cpu.rs index 9c843eaf..3c58580e 100644 --- a/crates/eonix_hal/src/arch/riscv64/cpu.rs +++ b/crates/eonix_hal/src/arch/riscv64/cpu.rs @@ -27,22 +27,11 @@ static DEFAULT_TRAP_CONTEXT: MaybeUninit = MaybeUninit::uninit(); #[eonix_percpu::define_percpu] static LOCAL_CPU: LazyLock = LazyLock::new(|| CPU::new(CPUID.get())); -#[derive(Debug, Clone)] -pub enum UserTLS { - Base(u64), -} - /// RISC-V Hart pub struct CPU { pub(crate) interrupt: InterruptControl, } -impl UserTLS { - pub fn new(base: u64) -> Self { - Self::Base(base) - } -} - impl CPU { fn new(cpuid: usize) -> Self { Self { @@ -66,12 +55,6 @@ impl CPU { sscratch::write(DEFAULT_TRAP_CONTEXT.as_ptr() as usize); } - pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, sp: u64) {} - - pub fn set_tls32(self: Pin<&mut Self>, _user_tls: &UserTLS) { - // nothing - } - pub fn local() -> PreemptGuard> { unsafe { // SAFETY: We pass the reference into a `PreemptGuard`, which ensures diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index e789ecbb..b9c7d053 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -19,7 +19,7 @@ pub mod fpu { } pub mod processor { - pub use crate::arch::cpu::{halt, UserTLS, CPU, CPU_COUNT}; + pub use crate::arch::cpu::{halt, CPU, CPU_COUNT}; } /// Re-export the arch module for use in other crates diff --git a/crates/posix_types/src/syscall_no/loongarch64.rs b/crates/posix_types/src/syscall_no/loongarch64.rs index 19776a65..b0d54689 100644 --- a/crates/posix_types/src/syscall_no/loongarch64.rs +++ b/crates/posix_types/src/syscall_no/loongarch64.rs @@ -136,7 +136,7 @@ pub const SYS_RT_SIGSUSPEND: usize = 133; pub const SYS_RT_SIGACTION: usize = 134; pub const SYS_RT_SIGPROCMASK: usize = 135; pub const SYS_RT_SIGPENDING: usize = 136; -pub const SYS_RT_SIGTIMEDWAIT_TIME32: usize = 137; +pub const SYS_RT_SIGTIMEDWAIT: usize = 137; pub const SYS_RT_SIGQUEUEINFO: usize = 138; pub const SYS_RT_SIGRETURN: usize = 139; pub const SYS_SETPRIORITY: usize = 140; @@ -295,7 +295,6 @@ pub const SYS_RECVMMSG: usize = 417; pub const SYS_MQ_TIMEDSEND: usize = 418; pub const SYS_MQ_TIMEDRECEIVE: usize = 419; pub const SYS_SEMTIMEDOP: usize = 420; -pub const SYS_RT_SIGTIMEDWAIT: usize = 421; pub const SYS_FUTEX: usize = 422; pub const SYS_SCHED_RR_GET_INTERVAL: usize = 423; pub const SYS_PIDFD_SEND_SIGNAL: usize = 424; diff --git a/crates/posix_types/src/syscall_no/riscv64.rs b/crates/posix_types/src/syscall_no/riscv64.rs index 4457c20e..076942e9 100644 --- a/crates/posix_types/src/syscall_no/riscv64.rs +++ b/crates/posix_types/src/syscall_no/riscv64.rs @@ -136,7 +136,7 @@ pub const SYS_RT_SIGSUSPEND: usize = 133; pub const SYS_RT_SIGACTION: usize = 134; pub const SYS_RT_SIGPROCMASK: usize = 135; pub const SYS_RT_SIGPENDING: usize = 136; -pub const SYS_RT_SIGTIMEDWAIT_TIME32: usize = 137; +pub const SYS_RT_SIGTIMEDWAIT: usize = 137; pub const SYS_RT_SIGQUEUEINFO: usize = 138; pub const SYS_RT_SIGRETURN: usize = 139; pub const SYS_SETPRIORITY: usize = 140; diff --git a/src/kernel/interrupt.rs b/src/kernel/interrupt.rs index 742727cb..2092bfcb 100644 --- a/src/kernel/interrupt.rs +++ b/src/kernel/interrupt.rs @@ -1,15 +1,17 @@ -use super::mem::handle_kernel_page_fault; -use super::task::block_on; -use super::timer::timer_interrupt; -use crate::kernel::constants::EINVAL; -use crate::prelude::*; use alloc::sync::Arc; + use eonix_hal::traits::fault::Fault; use eonix_hal::traits::trap::{RawTrapContext, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::SpinIrq as _; +use super::mem::handle_kernel_page_fault; +use super::task::block_on; +use super::timer::timer_interrupt; +use crate::kernel::constants::EINVAL; +use crate::prelude::*; + static IRQ_HANDLERS: Spin<[Vec>; 16]> = Spin::new([const { Vec::new() }; 16]); diff --git a/src/kernel/pcie/init.rs b/src/kernel/pcie/init.rs index c0253f4e..4c183bc5 100644 --- a/src/kernel/pcie/init.rs +++ b/src/kernel/pcie/init.rs @@ -1,13 +1,14 @@ -use super::{ - device::{PCIDevice, SegmentGroup, PCIE_DEVICES}, - error::PciError, -}; -use crate::kernel::{mem::PhysAccess as _, pcie::device::PciMemoryAllocator}; -use acpi::{AcpiHandler, PhysicalMapping}; use alloc::collections::btree_map::Entry; use alloc::vec; + +use acpi::{AcpiHandler, PhysicalMapping}; use eonix_log::println_trace; -use eonix_mm::address::PAddr; +use eonix_mm::address::{PAddr, PRange}; + +use super::device::{PCIDevice, SegmentGroup, PCIE_DEVICES}; +use super::error::PciError; +use crate::kernel::mem::PhysAccess as _; +use crate::kernel::pcie::device::PciMemoryAllocator; #[derive(Clone)] struct AcpiHandlerImpl; @@ -67,10 +68,11 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - use crate::kernel::constants::{EINVAL, EIO, ENOENT}; use eonix_hal::arch_exported::fdt::FDT; use eonix_mm::address::PRange; + use crate::kernel::constants::{EINVAL, EIO, ENOENT}; + let pcie_node = FDT .find_compatible(&["pci-host-ecam-generic"]) .ok_or(ENOENT)?; diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index db32b0e5..93a543d7 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -1,33 +1,31 @@ +use alloc::sync::Arc; +use core::time::Duration; + +use posix_types::ctypes::{Long, PtrT}; +use posix_types::namei::RenameFlags; +use posix_types::open::{AtFlags, OpenFlags}; +use posix_types::poll::FDSet; +use posix_types::signal::{SigSet, Signal}; +use posix_types::stat::{Stat, StatX, TimeSpec}; +use posix_types::syscall_no::*; + use super::{FromSyscallArg, User}; -use crate::io::IntoStream; +use crate::io::{Buffer, BufferFill, IntoStream}; use crate::kernel::constants::{ EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, }; use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; use crate::kernel::timer::sleep; +use crate::kernel::user::{ + CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString, +}; +use crate::kernel::vfs::dentry::Dentry; use crate::kernel::vfs::filearray::FD; use crate::kernel::vfs::types::{DeviceId, Mode}; use crate::kernel::vfs::{PollEvent, SeekOption}; -use crate::{ - io::{Buffer, BufferFill}, - kernel::{ - user::{CheckedUserPointer, UserBuffer, UserPointer, UserPointerMut, UserString}, - vfs::dentry::Dentry, - }, - path::Path, - prelude::*, -}; -use alloc::sync::Arc; -use core::time::Duration; -use posix_types::ctypes::{Long, PtrT}; -use posix_types::namei::RenameFlags; -use posix_types::open::{AtFlags, OpenFlags}; -use posix_types::poll::FDSet; -use posix_types::signal::{SigSet, Signal}; -use posix_types::stat::Stat; -use posix_types::stat::{StatX, TimeSpec}; -use posix_types::syscall_no::*; +use crate::path::Path; +use crate::prelude::*; impl FromSyscallArg for OpenFlags { fn from_arg(value: usize) -> Self { @@ -128,7 +126,7 @@ async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: Mode) -> #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_OPEN)] -async fn open(path: User, flags: OpenFlags, mode: u32) -> KResult { +async fn open(path: User, flags: OpenFlags, mode: Mode) -> KResult { sys_openat(thread, FD::AT_FDCWD, path, flags, mode).await } @@ -145,7 +143,10 @@ async fn dup(fd: FD) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_DUP2)] async fn dup2(old_fd: FD, new_fd: FD) -> KResult { - thread.files.dup_to(old_fd, new_fd, OpenFlags::empty()) + thread + .files + .dup_to(old_fd, new_fd, OpenFlags::empty()) + .await } #[eonix_macros::define_syscall(SYS_DUP3)] @@ -172,7 +173,13 @@ async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; - thread.files.get(fd).ok_or(EBADF)?.getdents(&mut buffer)?; + thread + .files + .get(fd) + .ok_or(EBADF)? + .getdents(&mut buffer) + .await?; + Ok(buffer.wrote()) } @@ -264,7 +271,7 @@ async fn mkdirat(dirfd: FD, pathname: User, mode: Mode) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKDIR)] -async fn mkdir(pathname: User, mode: u32) -> KResult<()> { +async fn mkdir(pathname: User, mode: Mode) -> KResult<()> { sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode).await } @@ -280,9 +287,9 @@ async fn truncate(pathname: User, length: usize) -> KResult<()> { let path = UserString::new(pathname)?; let path = Path::new(path.as_cstr().to_bytes())?; - let dentry = Dentry::open(&thread.fs_context, path, true)?; + let dentry = Dentry::open(&thread.fs_context, path, true).await?; - dentry.truncate(length) + dentry.truncate(length).await } #[eonix_macros::define_syscall(SYS_UNLINKAT)] @@ -296,7 +303,7 @@ async fn unlinkat(dirfd: FD, pathname: User) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_UNLINK)] async fn unlink(pathname: User) -> KResult<()> { - sys_unlinkat(thread, FD::AT_FDCWD, pathname) + sys_unlinkat(thread, FD::AT_FDCWD, pathname).await } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] @@ -310,7 +317,7 @@ async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SYMLINK)] async fn symlink(target: User, linkpath: User) -> KResult<()> { - sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath) + sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath).await } #[derive(Clone, Copy, Debug)] @@ -347,7 +354,7 @@ async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceI #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -async fn mknod(pathname: User, mode: u32, dev: u32) -> KResult<()> { +async fn mknod(pathname: User, mode: Mode, dev: UserDeviceId) -> KResult<()> { sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } @@ -389,7 +396,7 @@ async fn lseek(fd: FD, offset: u64, whence: u32) -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_LLSEEK)] -fn llseek( +async fn llseek( fd: FD, offset_high: u32, offset_low: u32, diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index b4d3e449..1359d0ab 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -1,38 +1,37 @@ +use alloc::borrow::ToOwned; +use alloc::ffi::CString; +use core::time::Duration; + +use bitflags::bitflags; +use eonix_hal::traits::trap::RawTrapContext; +use eonix_hal::trap::TrapContext; +use eonix_mm::address::Addr as _; +use eonix_sync::AsProof as _; +use posix_types::ctypes::PtrT; +use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; +use posix_types::stat::{TimeSpec, TimeVal}; +use posix_types::syscall_no::*; +use posix_types::SIGNAL_NOW; + use super::SyscallNoReturn; use crate::io::Buffer; use crate::kernel::constants::{ - CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINVAL, ENOENT, ENOTDIR, ERANGE, ESRCH, -}; -use crate::kernel::constants::{ - ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, + CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINVAL, ENOENT, ENOSYS, ENOTDIR, + ERANGE, ESRCH, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK, }; use crate::kernel::mem::PageBuffer; use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ - do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader, - RobustListHead, SignalAction, Thread, WaitId, WaitType, + do_clone, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, FutexOp, + ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; -use crate::kernel::task::{parse_futexop, CloneArgs}; use crate::kernel::timer::sleep; -use crate::kernel::user::UserString; -use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::user::{UserBuffer, UserPointer, UserPointerMut, UserString}; +use crate::kernel::vfs::dentry::Dentry; use crate::kernel::vfs::types::Permission; -use crate::kernel::vfs::{self, dentry::Dentry}; +use crate::kernel::vfs::{self}; use crate::path::Path; -use crate::{kernel::user::UserBuffer, prelude::*}; -use alloc::borrow::ToOwned; -use alloc::ffi::CString; -use bitflags::bitflags; -use core::time::Duration; -use eonix_hal::processor::UserTLS; -use eonix_hal::traits::trap::RawTrapContext; -use eonix_hal::trap::TrapContext; -use eonix_mm::address::Addr as _; -use eonix_sync::AsProof as _; -use posix_types::ctypes::PtrT; -use posix_types::signal::{SigAction, SigInfo, SigSet, Signal}; -use posix_types::stat::TimeVal; -use posix_types::{syscall_no::*, SIGNAL_NOW}; +use crate::prelude::*; #[repr(C)] #[derive(Debug, Clone, Copy)] @@ -366,7 +365,7 @@ async fn wait4( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_WAITPID)] async fn waitpid(waitpid: i32, arg1: UserMut, options: u32) -> KResult { - sys_wait4(thread, waitpid, arg1, options, core::ptr::null_mut()).await + sys_wait4(thread, waitpid, arg1, options, UserMut::null()).await } #[eonix_macros::define_syscall(SYS_SETSID)] @@ -493,51 +492,15 @@ async fn gettid() -> KResult { Ok(thread.tid) } -pub fn parse_user_tls(arch_tls: usize) -> KResult { - #[cfg(target_arch = "x86_64")] - { - let desc = arch_tls as *mut posix_types::x86_64::UserDescriptor; - let desc_pointer = UserPointerMut::new(desc)?; - let mut desc = desc_pointer.read()?; - - // Clear the TLS area if it is not present. - if desc.flags.is_read_exec_only() && !desc.flags.is_present() { - if desc.limit != 0 && desc.base != 0 { - let len = if desc.flags.is_limit_in_pages() { - (desc.limit as usize) << 12 - } else { - desc.limit as usize - }; - - CheckedUserPointer::new(desc.base as _, len)?.zero()?; - } - } - - let (new_tls, entry) = - UserTLS::new32(desc.base, desc.limit, desc.flags.is_limit_in_pages()); - desc.entry = entry; - desc_pointer.write(desc)?; - - Ok(new_tls) - } - - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] - { - Ok(UserTLS::new(arch_tls as u64)) - } -} - #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_SET_THREAD_AREA)] -async fn set_thread_area(arch_tls: usize) -> KResult<()> { - thread.set_user_tls(parse_user_tls(arch_tls)?)?; +async fn set_thread_area(tls: PtrT) -> KResult<()> { + use crate::kernel::task::UserTLSDescriptor; - // SAFETY: Preemption is disabled on calling `load_thread_area32()`. - unsafe { - eonix_preempt::disable(); - thread.load_thread_area32(); - eonix_preempt::enable(); - } + let tls = UserTLSDescriptor::new(tls)?.read()?; + + thread.set_user_tls(tls)?; + thread.activate_tls(); Ok(()) } @@ -651,18 +614,14 @@ async fn rt_sigprocmask( Ok(()) } -#[repr(C)] -#[derive(Clone, Copy)] -struct TimeSpec32 { - tv_sec: i32, - tv_nsec: i32, -} - -#[eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT_TIME32)] -async fn rt_sigtimedwait_time32( +#[cfg_attr( + any(target_arch = "riscv64", target_arch = "loongarch64"), + eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT) +)] +async fn rt_sigtimedwait( _uthese: User, _uinfo: UserMut, - _uts: User, + _uts: User, ) -> KResult { // TODO Ok(0) @@ -820,7 +779,7 @@ async fn clone( clone_flags: usize, new_sp: usize, parent_tidptr: UserMut, - tls: usize, + tls: PtrT, child_tidptr: UserMut, ) -> KResult { let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?; @@ -925,8 +884,23 @@ async fn sigreturn() -> KResult { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_ARCH_PRCTL)] -async fn arch_prctl(option: u32, addr: u32) -> KResult { - sys_arch_prctl(thread, option, addr).await +async fn arch_prctl(option: u32, addr: PtrT) -> KResult { + match option { + PR_SET_NAME => { + let name = UserPointer::<[u8; 16]>::new(User::with_addr(addr.addr()))?.read()?; + let len = name.iter().position(|&c| c == 0).unwrap_or(15); + thread.set_name(name[..len].into()); + Ok(0) + } + PR_GET_NAME => { + let name = thread.get_name(); + let len = name.len().min(15); + let name: [u8; 16] = core::array::from_fn(|i| if i < len { name[i] } else { 0 }); + UserPointerMut::<[u8; 16]>::new(UserMut::with_addr(addr.addr()))?.write(name)?; + Ok(0) + } + _ => Err(EINVAL), + } } pub fn keep_alive() {} diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 2ef58069..3fe6fe97 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -8,6 +8,7 @@ mod process_list; mod session; mod signal; mod thread; +mod user_tls; pub use clone::{do_clone, CloneArgs, CloneFlags}; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; @@ -19,6 +20,7 @@ pub use process_list::ProcessList; pub use session::Session; pub use signal::SignalAction; pub use thread::{yield_now, Thread, ThreadAlloc, ThreadBuilder}; +pub use user_tls::{UserTLS, UserTLSDescriptor}; fn do_block_on(mut future: core::pin::Pin<&mut F>) -> F::Output where @@ -79,30 +81,25 @@ pub async fn stackful(mut future: F) -> F::Output where F: core::future::Future, { - use crate::kernel::{ - interrupt::{default_fault_handler, default_irq_handler}, - timer::{should_reschedule, timer_interrupt}, - }; use alloc::sync::Arc; use alloc::task::Wake; use core::cell::UnsafeCell; use core::future::Future; use core::pin::Pin; use core::ptr::NonNull; - use core::sync::atomic::AtomicBool; - use core::sync::atomic::Ordering; - use core::task::Context; - use core::task::Poll; - use core::task::Waker; - use eonix_hal::traits::trap::RawTrapContext; - use eonix_hal::traits::trap::TrapReturn; - use eonix_hal::traits::trap::TrapType; + use core::sync::atomic::{AtomicBool, Ordering}; + use core::task::{Context, Poll, Waker}; + + use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; use eonix_hal::trap::TrapContext; use eonix_preempt::assert_preempt_enabled; use eonix_runtime::executor::Stack; use eonix_runtime::task::Task; use thread::wait_for_wakeups; + use crate::kernel::interrupt::{default_fault_handler, default_irq_handler}; + use crate::kernel::timer::{should_reschedule, timer_interrupt}; + let stack = KernelStack::new(); fn execute(mut future: Pin<&mut F>, output_ptr: NonNull>) -> ! diff --git a/src/kernel/task/clone.rs b/src/kernel/task/clone.rs index e0d578c1..dd6f538d 100644 --- a/src/kernel/task/clone.rs +++ b/src/kernel/task/clone.rs @@ -1,18 +1,17 @@ -use crate::{ - kernel::{ - syscall::{procops::parse_user_tls, UserMut}, - task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}, - user::UserPointerMut, - }, - KResult, -}; -use bitflags::bitflags; use core::num::NonZero; -use eonix_hal::processor::UserTLS; + +use bitflags::bitflags; use eonix_runtime::scheduler::RUNTIME; use eonix_sync::AsProof; +use posix_types::ctypes::PtrT; use posix_types::signal::Signal; +use super::{UserTLS, UserTLSDescriptor}; +use crate::kernel::syscall::UserMut; +use crate::kernel::task::{alloc_pid, ProcessBuilder, ProcessList, Thread, ThreadBuilder}; +use crate::kernel::user::UserPointerMut; +use crate::KResult; + bitflags! { #[derive(Debug, Default)] pub struct CloneFlags: usize { @@ -46,12 +45,18 @@ bitflags! { #[derive(Debug)] pub struct CloneArgs { pub flags: CloneFlags, - pub sp: Option>, // Stack pointer for the new thread. - pub exit_signal: Option, // Signal to send to the parent on exit. - pub set_tid_ptr: Option>, // Pointer to set child TID in user space. - pub clear_tid_ptr: Option>, // Pointer to clear child TID in user space. - pub parent_tid_ptr: Option>, // Pointer to parent TID in user space. - pub tls: Option, // Pointer to TLS information. + /// Stack pointer for the new thread. + pub sp: Option>, + /// Signal to send to the parent on exit. + pub exit_signal: Option, + /// Pointer to set child TID in user space. + pub set_tid_ptr: Option>, + /// Pointer to clear child TID in user space. + pub clear_tid_ptr: Option>, + /// Pointer to parent TID in user space. + pub parent_tid_ptr: Option>, + /// Pointer to TLS information. + pub tls: Option, } impl CloneArgs { @@ -62,7 +67,7 @@ impl CloneArgs { sp: usize, child_tid_ptr: UserMut, parent_tid_ptr: UserMut, - tls: usize, + tls: PtrT, ) -> KResult { let clone_flags = CloneFlags::from_bits_truncate(flags & !Self::MASK); let exit_signal = flags & Self::MASK; @@ -87,7 +92,8 @@ impl CloneArgs { .then_some(parent_tid_ptr); let tls = if clone_flags.contains(CloneFlags::CLONE_SETTLS) { - Some(parse_user_tls(tls)?) + let tls_desc = UserTLSDescriptor::new(tls)?; + Some(tls_desc.read()?) } else { None }; diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 11348e51..77e8e618 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -1,43 +1,37 @@ -use super::{ - signal::{RaiseResult, SignalList}, - stackful, Process, ProcessList, WaitType, -}; -use crate::{ - kernel::{ - interrupt::default_irq_handler, - syscall::{syscall_handlers, SyscallHandler, User, UserMut}, - task::{clone::CloneArgs, futex::RobustListHead, CloneFlags}, - timer::{should_reschedule, timer_interrupt}, - user::{UserPointer, UserPointerMut}, - vfs::{filearray::FileArray, FsContext}, - }, - prelude::*, -}; -use alloc::{alloc::Allocator, sync::Arc}; +use alloc::alloc::Allocator; +use alloc::sync::Arc; +use core::future::{poll_fn, Future}; +use core::pin::Pin; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicBool, Ordering}; +use core::task::{Context, Poll}; + use atomic_unique_refcell::AtomicUniqueRefCell; -use core::{ - future::{poll_fn, Future}, - pin::Pin, - ptr::NonNull, - sync::atomic::{AtomicBool, Ordering}, - task::{Context, Poll}, -}; -use eonix_hal::{ - fpu::FpuState, - processor::{UserTLS, CPU}, - traits::{ - fault::Fault, - fpu::RawFpuState as _, - trap::{RawTrapContext, TrapReturn, TrapType}, - }, - trap::TrapContext, -}; +use eonix_hal::fpu::FpuState; +use eonix_hal::traits::fault::Fault; +use eonix_hal::traits::fpu::RawFpuState as _; +use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; +use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; use pointers::BorrowedArc; use posix_types::signal::Signal; use stalloc::UnsafeStalloc; +use super::signal::{RaiseResult, SignalList}; +use super::user_tls::UserTLS; +use super::{stackful, Process, ProcessList, WaitType}; +use crate::kernel::interrupt::default_irq_handler; +use crate::kernel::syscall::{syscall_handlers, SyscallHandler, User, UserMut}; +use crate::kernel::task::clone::CloneArgs; +use crate::kernel::task::futex::RobustListHead; +use crate::kernel::task::CloneFlags; +use crate::kernel::timer::{should_reschedule, timer_interrupt}; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::vfs::filearray::FileArray; +use crate::kernel::vfs::FsContext; +use crate::prelude::*; + #[eonix_percpu::define_percpu] static CURRENT_THREAD: Option> = None; @@ -275,12 +269,9 @@ impl Thread { self.signal_list.raise(signal) } - /// # Safety - /// This function is unsafe because it accesses the `current_cpu()`, which needs - /// to be called in a preemption disabled context. - pub unsafe fn load_thread_area32(&self) { + pub fn activate_tls(&self) { if let Some(tls) = self.inner.lock().tls.as_ref() { - CPU::local().as_mut().set_tls32(tls); + tls.activate(); } } @@ -442,14 +433,7 @@ impl Thread { CURRENT_THREAD.set(NonNull::new(&raw const *self as *mut _)); - unsafe { - eonix_preempt::disable(); - - // SAFETY: Preemption is disabled. - self.load_thread_area32(); - - eonix_preempt::enable(); - } + self.activate_tls(); let result = future.as_mut().poll(cx); diff --git a/src/kernel/task/user_tls/mod.rs b/src/kernel/task/user_tls/mod.rs new file mode 100644 index 00000000..2583b580 --- /dev/null +++ b/src/kernel/task/user_tls/mod.rs @@ -0,0 +1,34 @@ +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + mod x86_64; + pub use x86_64::*; + } else { + use eonix_mm::address::VAddr; + use posix_types::ctypes::PtrT; + + use crate::prelude::KResult; + + + #[derive(Debug, Clone)] + pub struct UserTLS(VAddr); + + #[derive(Debug, Clone)] + pub struct UserTLSDescriptor(VAddr); + + impl UserTLS { + pub fn activate(&self) { + self.0; + } + } + + impl UserTLSDescriptor { + pub fn new(tp: PtrT) -> KResult { + Ok(Self(VAddr::from(tp.addr()))) + } + + pub fn read(&self) -> KResult { + Ok(UserTLS(self.0)) + } + } + } +} diff --git a/src/kernel/task/user_tls/x86_64.rs b/src/kernel/task/user_tls/x86_64.rs new file mode 100644 index 00000000..5bb33b97 --- /dev/null +++ b/src/kernel/task/user_tls/x86_64.rs @@ -0,0 +1,83 @@ +use core::fmt; + +use eonix_hal::arch_exported::gdt::{GDTEntry, GDT}; +use eonix_hal::processor::CPU; +use eonix_mm::address::VAddr; +use posix_types::ctypes::PtrT; +use posix_types::x86_64::UserDescriptor; + +use crate::kernel::syscall::{User, UserMut}; +use crate::kernel::user::{CheckedUserPointer, UserPointerMut}; +use crate::prelude::KResult; + +#[derive(Debug, Clone)] +pub struct UserTLS { + desc: GDTEntry, + base: u64, +} + +pub struct UserTLSDescriptor<'a> { + ptr: UserPointerMut<'a, UserDescriptor>, +} + +impl UserTLS { + fn new(base: u32, limit: u32) -> Self { + Self { + desc: GDTEntry::new_tls(base, limit), + base: base as u64, + } + } + + fn new_page_limit(base: u32, limit_in_pages: u32) -> Self { + Self { + desc: GDTEntry::new_tls_page_limit(base, limit_in_pages), + base: base as u64, + } + } + + pub fn activate(&self) { + CPU::local().as_mut().set_tls32(self.desc, self.base); + } +} + +impl UserTLSDescriptor<'_> { + pub fn new(raw_tls: PtrT) -> KResult { + Ok(Self { + ptr: UserPointerMut::new(UserMut::::with_addr(raw_tls.addr()))?, + }) + } + + pub fn read(&self) -> KResult { + let mut desc = self.ptr.read()?; + + let base = VAddr::from(desc.base as usize); + + // Clear the TLS area if it is not present. + if desc.flags.is_read_exec_only() && !desc.flags.is_present() { + if desc.limit != 0 && base != VAddr::NULL { + let len = if desc.flags.is_limit_in_pages() { + (desc.limit as usize) << 12 + } else { + desc.limit as usize + }; + + CheckedUserPointer::new(User::new(base), len)?.zero()?; + } + } + + desc.entry = GDT::TLS32_INDEX as u32; + self.ptr.write(desc)?; + + Ok(if desc.flags.is_limit_in_pages() { + UserTLS::new_page_limit(desc.base, desc.limit) + } else { + UserTLS::new(desc.base, desc.limit) + }) + } +} + +impl fmt::Debug for UserTLSDescriptor<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("UserTLSDescriptor").finish_non_exhaustive() + } +} diff --git a/src/kernel/user/dataflow.rs b/src/kernel/user/dataflow.rs index 02e7d791..5d8ac167 100644 --- a/src/kernel/user/dataflow.rs +++ b/src/kernel/user/dataflow.rs @@ -1,18 +1,15 @@ -use crate::{ - io::{Buffer, FillResult}, - prelude::*, -}; -use crate::{ - io::{IntoStream, Stream}, - kernel::{ - constants::{EFAULT, EINVAL}, - syscall::{User, UserMut}, - }, -}; -use core::{arch::asm, ffi::CStr, marker::PhantomData}; +use core::arch::asm; +use core::ffi::CStr; +use core::marker::PhantomData; + use eonix_mm::address::Addr; use eonix_preempt::assert_preempt_enabled; +use crate::io::{Buffer, FillResult, IntoStream, Stream}; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::syscall::{User, UserMut}; +use crate::prelude::*; + pub struct CheckedUserPointer<'a> { ptr: User, len: usize, From 841bb379b085494e3975e8f132f8caca0447ff0d Mon Sep 17 00:00:00 2001 From: greatbridf Date: Tue, 6 Jan 2026 22:02:22 +0800 Subject: [PATCH 07/25] mem, slab: rework the slab system Signed-off-by: greatbridf --- .vscode/settings.json | 1 + Cargo.lock | 1 - crates/intrusive_list/src/lib.rs | 2 + crates/slab_allocator/Cargo.toml | 2 - crates/slab_allocator/src/lib.rs | 291 +++++++++++++++++++++--- crates/slab_allocator/src/slab_cache.rs | 164 ------------- src/kernel/mem/allocator.rs | 21 +- src/kernel/mem/page_alloc.rs | 10 +- src/kernel/mem/page_alloc/raw_page.rs | 199 ++++++++++------ 9 files changed, 401 insertions(+), 290 deletions(-) delete mode 100644 crates/slab_allocator/src/slab_cache.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index 634d16af..10b4a8b4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { "makefile.configureOnOpen": false, + "editor.formatOnSave": true, } diff --git a/Cargo.lock b/Cargo.lock index 5487f284..3e8a36bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -518,7 +518,6 @@ version = "0.1.0" dependencies = [ "eonix_mm", "eonix_sync", - "intrusive_list", ] [[package]] diff --git a/crates/intrusive_list/src/lib.rs b/crates/intrusive_list/src/lib.rs index af8c4f1a..440944d0 100644 --- a/crates/intrusive_list/src/lib.rs +++ b/crates/intrusive_list/src/lib.rs @@ -25,6 +25,8 @@ impl List { } pub fn insert(&mut self, node: &mut Link) { + // TODO: `node` above should be of 'static. + self.head.insert(node); self.count += 1; } diff --git a/crates/slab_allocator/Cargo.toml b/crates/slab_allocator/Cargo.toml index 067b6f53..926ac688 100644 --- a/crates/slab_allocator/Cargo.toml +++ b/crates/slab_allocator/Cargo.toml @@ -6,5 +6,3 @@ edition = "2024" [dependencies] eonix_mm = { path = "../eonix_mm" } eonix_sync = { path = "../eonix_sync" } -intrusive_list = { path = "../intrusive_list" } - diff --git a/crates/slab_allocator/src/lib.rs b/crates/slab_allocator/src/lib.rs index ce163183..8a684edd 100644 --- a/crates/slab_allocator/src/lib.rs +++ b/crates/slab_allocator/src/lib.rs @@ -1,69 +1,288 @@ #![no_std] -mod slab_cache; +use core::ptr::NonNull; -use core::{cmp::max, ptr::NonNull}; - -use eonix_mm::paging::{PageAlloc, RawPage}; use eonix_sync::Spin; -use intrusive_list::Link; -use slab_cache::SlabCache; -pub trait SlabRawPage: RawPage { - /// Get the container raw page struct of the list link. +#[repr(C)] +pub union SlabSlot { + slab_slot: Option>, + data: u8, +} + +pub trait SlabPageList: Sized { + type Page: SlabPage; + + fn new() -> Self; + fn is_empty(&self) -> bool; + + fn peek_head(&mut self) -> Option<&mut Self::Page>; + + fn pop_head(&mut self) -> Option<&'static mut Self::Page>; + fn push_tail(&mut self, page: &'static mut Self::Page); + fn remove(&mut self, page: &mut Self::Page); +} + +pub trait SlabPage: Sized + 'static { + fn get_data_ptr(&self) -> NonNull<[u8]>; + + fn get_free_slot(&self) -> Option>; + fn set_free_slot(&mut self, next: Option>); + + fn get_alloc_count(&self) -> usize; + + /// Increase the allocation count by 1 and return the increased value. + fn inc_alloc_count(&mut self) -> usize; + + /// Decrease the allocation count by 1 and return the decreased value. + fn dec_alloc_count(&mut self) -> usize; + + /// Get the [`SlabPage`] that `ptr` is allocated from. /// /// # Safety - /// The caller MUST ensure that the link points to a `RawPage`. - unsafe fn from_link(link: &mut Link) -> Self; + /// The caller MUST ensure that no others could be calling this function and + /// getting the [`SlabPage`] at the same time. + unsafe fn from_allocated(ptr: NonNull) -> &'static mut Self; +} + +pub(crate) trait SlabPageExt { + fn alloc_slot(&mut self) -> Option>; - /// Get the list link of the raw page. - /// /// # Safety - /// The caller MUST ensure that at any time, only one mutable reference - /// to the link exists. - unsafe fn get_link(&self) -> &mut Link; + /// The caller MUST ensure that `slot_data_ptr` points to some position + /// previously allocated by [`SlabPageExt::alloc_slot`]. + unsafe fn free_slot(&mut self, slot_data_ptr: NonNull); + + fn is_empty(&self) -> bool; + fn is_full(&self) -> bool; +} + +impl SlabPageExt for T +where + T: SlabPage, +{ + fn alloc_slot(&mut self) -> Option> { + let mut free_slot = self.get_free_slot()?; + + unsafe { + let free_slot = free_slot.as_mut(); + + let next_slot = free_slot.slab_slot; + // ===== `free_slot` is now safe to be overwritten - fn slab_init(&self, first_free: Option>); + self.set_free_slot(next_slot); + self.inc_alloc_count(); - // which slab page the ptr belong - fn in_which(ptr: *mut u8) -> Self; + Some(NonNull::new_unchecked(&mut free_slot.data)) + } + } + + unsafe fn free_slot(&mut self, slot_data_ptr: NonNull) { + unsafe { + let mut free_slot: NonNull = slot_data_ptr.cast(); + free_slot.as_mut().slab_slot = self.get_free_slot(); - fn real_page_ptr(&self) -> *mut u8; + self.set_free_slot(Some(free_slot)); + self.dec_alloc_count(); + } + } - fn allocated_count(&self) -> &mut u32; + fn is_empty(&self) -> bool { + self.get_alloc_count() == 0 + } - fn next_free(&self) -> &mut Option>; + fn is_full(&self) -> bool { + self.get_free_slot().is_none() + } } -pub struct SlabAllocator { - slabs: [Spin>; SLAB_CACHE_COUNT], - alloc: A, +pub trait SlabPageAlloc { + type Page: SlabPage; + type PageList: SlabPageList; + + /// Allocate a page suitable for slab system use. The page MUST come with + /// its allocation count 0 and next free slot None. + /// + /// # Safety + /// The page returned MUST be properly initialized before its usage. + unsafe fn alloc_uninit(&self) -> &'static mut Self::Page; } -unsafe impl Send for SlabAllocator {} -unsafe impl Sync for SlabAllocator {} +pub(crate) struct SlabList +where + T: SlabPageList, +{ + empty_list: T, + partial_list: T, + full_list: T, + object_size: usize, +} + +pub struct SlabAlloc +where + P: SlabPageAlloc, +{ + slabs: [Spin>; COUNT], + alloc: P, +} + +unsafe impl Send for SlabAlloc where P: SlabPageAlloc {} +unsafe impl Sync for SlabAlloc where P: SlabPageAlloc {} -impl SlabAllocator +impl SlabAlloc where - Raw: SlabRawPage, - Allocator: PageAlloc, + L: SlabPageAlloc, { - pub fn new_in(alloc: Allocator) -> Self { + pub fn new_in(alloc: L) -> Self { Self { - slabs: core::array::from_fn(|i| Spin::new(SlabCache::new_in(1 << (i + 3)))), + slabs: core::array::from_fn(|i| Spin::new(SlabList::new(1 << (i + 3)))), alloc, } } - pub fn alloc(&self, mut size: usize) -> *mut u8 { - size = max(8, size); + pub fn alloc(&self, mut size: usize) -> NonNull { + size = size.max(8); let idx = size.next_power_of_two().trailing_zeros() - 3; self.slabs[idx as usize].lock().alloc(&self.alloc) } - pub fn dealloc(&self, ptr: *mut u8, mut size: usize) { - size = max(8, size); + pub unsafe fn dealloc(&self, ptr: NonNull, mut size: usize) { + size = size.max(8); let idx = size.next_power_of_two().trailing_zeros() - 3; - self.slabs[idx as usize].lock().dealloc(ptr, &self.alloc); + + unsafe { + // SAFETY: + self.slabs[idx as usize].lock().dealloc(ptr, &self.alloc); + } } } + +impl SlabList +where + T: SlabPageList, +{ + fn new(object_size: usize) -> Self { + Self { + empty_list: T::new(), + partial_list: T::new(), + full_list: T::new(), + object_size, + } + } + + fn alloc_from_partial(&mut self) -> NonNull { + let head = self.partial_list.peek_head().unwrap(); + let slot = head.alloc_slot().unwrap(); + + if head.is_full() { + let head = self.partial_list.pop_head().unwrap(); + self.full_list.push_tail(head); + } + + slot + } + + fn alloc_from_empty(&mut self) -> NonNull { + let head = self.empty_list.pop_head().unwrap(); + let slot = head.alloc_slot().unwrap(); + + if head.is_full() { + self.full_list.push_tail(head); + } else { + self.partial_list.push_tail(head); + } + + slot + } + + fn charge(&mut self, alloc: &impl SlabPageAlloc) { + unsafe { + let slab = alloc.alloc_uninit(); + let free_slot = make_slab_page(slab.get_data_ptr(), self.object_size); + + slab.set_free_slot(Some(free_slot)); + + self.empty_list.push_tail(slab); + } + } + + fn alloc(&mut self, alloc: &impl SlabPageAlloc) -> NonNull { + if !self.partial_list.is_empty() { + return self.alloc_from_partial(); + } + + if self.empty_list.is_empty() { + self.charge(alloc); + } + + self.alloc_from_empty() + } + + unsafe fn dealloc(&mut self, ptr: NonNull, _alloc: &impl SlabPageAlloc) { + let slab_page = unsafe { + // SAFETY: + ::from_allocated(ptr) + }; + + let (was_full, is_empty); + + was_full = slab_page.is_full(); + + unsafe { + // SAFETY: + slab_page.free_slot(ptr); + } + + is_empty = slab_page.is_empty(); + + match (was_full, is_empty) { + (false, false) => {} + (false, true) => { + self.partial_list.remove(slab_page); + self.empty_list.push_tail(slab_page); + } + (true, false) => { + self.full_list.remove(slab_page); + self.partial_list.push_tail(slab_page); + } + (true, true) => { + self.full_list.remove(slab_page); + self.empty_list.push_tail(slab_page); + } + } + + // TODO: Check whether we should place some pages back with `alloc` if + // the global free page count is below the watermark. + } +} + +pub fn make_slab_page(page_ptr: NonNull<[u8]>, slot_size: usize) -> NonNull { + assert!( + slot_size >= core::mem::size_of::(), + "The minimum slot size is of a pointer's width" + ); + + let page_size = page_ptr.len(); + let slot_count = page_size / slot_size; + let page_start: NonNull = page_ptr.cast(); + + // Quick checks + assert!( + page_size % slot_size == 0, + "The page's size should be a multiple of the slot size" + ); + + let mut prev_free_slot = None; + for i in (0..slot_count).rev() { + let offset = i * slot_size; + + unsafe { + let mut slot_ptr: NonNull = page_start.add(offset).cast(); + + slot_ptr.as_mut().slab_slot = prev_free_slot; + prev_free_slot = Some(slot_ptr); + } + } + + prev_free_slot.expect("There should be at least one slot.") +} diff --git a/crates/slab_allocator/src/slab_cache.rs b/crates/slab_allocator/src/slab_cache.rs deleted file mode 100644 index 98e27fc8..00000000 --- a/crates/slab_allocator/src/slab_cache.rs +++ /dev/null @@ -1,164 +0,0 @@ -use super::SlabRawPage; -use core::{marker::PhantomData, ptr::NonNull}; -use eonix_mm::paging::{PageAlloc, PAGE_SIZE}; -use intrusive_list::List; - -pub(crate) struct SlabCache { - empty_list: List, - partial_list: List, - full_list: List, - object_size: u32, - _phantom: PhantomData<(T, A)>, -} - -trait SlabRawPageExt { - fn alloc_slot(&self) -> Option>; - fn dealloc_slot(&self, slot_ptr: *mut u8); - fn is_full(&self) -> bool; - fn is_empty(&self) -> bool; - fn slab_page_init(&self, object_size: u32) -> Option>; -} - -impl SlabRawPageExt for T -where - T: SlabRawPage, -{ - fn alloc_slot(&self) -> Option> { - let ptr = self.next_free().clone(); - - let next_free = match ptr { - Some(ptr) => unsafe { ptr.read() as *mut usize }, - None => unreachable!(), - }; - *self.allocated_count() += 1; - *self.next_free() = NonNull::new(next_free); - return ptr; - } - - fn dealloc_slot(&self, slot_ptr: *mut u8) { - let slot_ptr = slot_ptr as *mut usize; - - if let Some(last_free) = self.next_free().clone() { - unsafe { *slot_ptr = last_free.as_ptr() as usize } - } else { - unsafe { *slot_ptr = 0 } - } - - *self.allocated_count() -= 1; - *self.next_free() = NonNull::new(slot_ptr); - } - - fn slab_page_init(&self, object_size: u32) -> Option> { - assert!(object_size >= core::mem::size_of::() as u32); - - let first_free = self.real_page_ptr() as *mut usize; - - let mut slot_ptr = first_free; - let mut slot_count = PAGE_SIZE / object_size as usize; - - // SAFETY: carefully ptr operate - unsafe { - loop { - if slot_count == 1 { - *slot_ptr = 0; - break; - } - - let next_ptr = slot_ptr.byte_add(object_size as usize); - *slot_ptr = next_ptr as usize; - slot_ptr = next_ptr; - slot_count -= 1; - } - } - - NonNull::new(first_free) - } - - fn is_empty(&self) -> bool { - self.allocated_count().clone() == 0 - } - - fn is_full(&self) -> bool { - self.next_free().is_none() - } -} - -impl SlabCache -where - Raw: SlabRawPage, - Allocator: PageAlloc, -{ - pub(crate) const fn new_in(object_size: u32) -> Self { - // avoid unnecessary branch in alloc and dealloc - assert!(object_size <= PAGE_SIZE as u32 / 2); - - Self { - empty_list: List::new(), - partial_list: List::new(), - full_list: List::new(), - object_size: object_size, - _phantom: PhantomData, - } - } - - pub(crate) fn alloc(&mut self, alloc: &Allocator) -> *mut u8 { - if !self.partial_list.is_empty() { - let page_ptr = unsafe { - Raw::from_link( - self.partial_list - .head() - .expect("partial pages should not be empty"), - ) - }; - - let ptr = page_ptr.alloc_slot().expect("should get slot"); - - if page_ptr.is_full() { - self.partial_list.remove(unsafe { page_ptr.get_link() }); - self.full_list.insert(unsafe { page_ptr.get_link() }); - } - return ptr.as_ptr() as *mut u8; - } - - if !self.empty_list.is_empty() { - let page_ptr = unsafe { - Raw::from_link( - self.empty_list - .head() - .expect("empty pages should not be empty"), - ) - }; - - let ptr = page_ptr.alloc_slot().expect("should get slot"); - self.empty_list.remove(unsafe { page_ptr.get_link() }); - self.partial_list.insert(unsafe { page_ptr.get_link() }); - return ptr.as_ptr() as *mut u8; - } - - let new_page_ptr = alloc.alloc().expect("slab_cache get page fail!"); - let first_free = new_page_ptr.slab_page_init(self.object_size); - new_page_ptr.slab_init(first_free); - let ptr = new_page_ptr.alloc_slot().expect("should get slot"); - self.partial_list.insert(unsafe { new_page_ptr.get_link() }); - ptr.as_ptr() as *mut u8 - } - - pub(crate) fn dealloc(&mut self, ptr: *mut u8, _alloc: &Allocator) { - let page_ptr = Raw::in_which(ptr); - - if page_ptr.is_full() { - self.full_list.remove(unsafe { page_ptr.get_link() }); - self.partial_list.insert(unsafe { page_ptr.get_link() }); - } - - page_ptr.dealloc_slot(ptr); - - if page_ptr.is_empty() { - self.partial_list.remove(unsafe { page_ptr.get_link() }); - self.empty_list.insert(unsafe { page_ptr.get_link() }); - } - - // TODO: Check whether we should place some pages back with `alloc` if the global - // free page count is below the watermark. - } -} diff --git a/src/kernel/mem/allocator.rs b/src/kernel/mem/allocator.rs index 9e5df69b..a3676ce0 100644 --- a/src/kernel/mem/allocator.rs +++ b/src/kernel/mem/allocator.rs @@ -5,13 +5,12 @@ use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::PhysAccess; use eonix_mm::paging::{PAGE_SIZE_BITS, PFN}; use eonix_sync::LazyLock; -use slab_allocator::SlabAllocator; +use slab_allocator::SlabAlloc; -use super::page_alloc::RawPagePtr; use super::{GlobalPageAlloc, Page, PageExt}; -static SLAB_ALLOCATOR: LazyLock> = - LazyLock::new(|| SlabAllocator::new_in(GlobalPageAlloc)); +static SLAB_ALLOCATOR: LazyLock> = + LazyLock::new(|| SlabAlloc::new_in(GlobalPageAlloc)); struct Allocator; @@ -28,23 +27,23 @@ unsafe impl GlobalAlloc for Allocator { let ptr = page.get_ptr(); page.into_raw(); - ptr.as_ptr() + ptr }; - if result.is_null() { - core::ptr::null_mut() - } else { - result as *mut u8 - } + result.as_ptr() } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { let size = layout.size().next_power_of_two(); + let ptr = unsafe { + // SAFETY: The memory we've allocated MUST be non-null. + NonNull::new_unchecked(ptr) + }; if size <= 2048 { SLAB_ALLOCATOR.dealloc(ptr, size) } else { - let paddr = ArchPhysAccess::from_ptr(NonNull::new_unchecked(ptr)); + let paddr = ArchPhysAccess::from_ptr(ptr); let pfn = PFN::from(paddr); Page::from_raw(pfn); }; diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index fcbe9bb3..1c018f37 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -1,15 +1,13 @@ mod raw_page; -use buddy_allocator::{BuddyAllocator, BuddyRawPage as _}; use core::sync::atomic::Ordering; -use eonix_mm::{ - address::{AddrOps as _, PRange}, - paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN}, -}; + +use buddy_allocator::{BuddyAllocator, BuddyRawPage as _}; +use eonix_mm::address::{AddrOps as _, PRange}; +use eonix_mm::paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN}; use eonix_sync::{NoContext, Spin}; use intrusive_list::List; use raw_page::PageFlags; - pub use raw_page::{RawPage, RawPagePtr}; const COSTLY_ORDER: u32 = 3; diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 08536693..d793ccd7 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -1,65 +1,46 @@ -use crate::kernel::mem::page_cache::PageCacheRawPage; -use crate::kernel::mem::PhysAccess; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; + use buddy_allocator::BuddyRawPage; -use core::{ - ptr::NonNull, - sync::atomic::{AtomicU32, AtomicUsize, Ordering}, -}; use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::{ - address::{PAddr, PhysAccess as _}, - paging::{RawPage as RawPageTrait, PFN}, -}; -use intrusive_list::{container_of, Link}; -use slab_allocator::SlabRawPage; +use eonix_mm::address::{PAddr, PhysAccess as _}; +use eonix_mm::paging::{PageAlloc, RawPage as RawPageTrait, PFN}; +use intrusive_list::{container_of, Link, List}; +use slab_allocator::{SlabPage, SlabPageAlloc, SlabPageList, SlabSlot}; + +use super::GlobalPageAlloc; +use crate::kernel::mem::page_cache::PageCacheRawPage; +use crate::kernel::mem::PhysAccess; const PAGE_ARRAY: NonNull = unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; pub struct PageFlags(AtomicU32); -struct SlabPageInner { - allocated_count: u32, - free_next: Option>, +#[derive(Clone, Copy)] +struct SlabPageData { + allocated_count: usize, + free_next: Option>, } -impl SlabPageInner { - fn new(free_next: Option>) -> Self { +impl SlabPageData { + const fn new() -> Self { Self { allocated_count: 0, - free_next, + free_next: None, } } } -struct PageCacheInner { +#[derive(Clone, Copy)] +struct PageCacheData { valid_size: usize, } -pub struct BuddyPageInner {} - -enum PageType { - Buddy(BuddyPageInner), - Slab(SlabPageInner), - PageCache(PageCacheInner), -} - -impl PageType { - fn slab_data(&mut self) -> &mut SlabPageInner { - if let PageType::Slab(slab_data) = self { - return slab_data; - } else { - unreachable!() - } - } - - fn page_cache_data(&mut self) -> &mut PageCacheInner { - if let PageType::PageCache(cache_data) = self { - return cache_data; - } else { - unreachable!() - } - } +#[repr(C)] +union PageData { + slab: SlabPageData, + page_cache: PageCacheData, } pub struct RawPage { @@ -73,7 +54,7 @@ pub struct RawPage { flags: PageFlags, refcount: AtomicUsize, - shared_data: PageType, + shared_data: PageData, } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -109,6 +90,13 @@ impl PageFlags { } impl RawPagePtr { + pub const fn from_ref(raw_page_ref: &RawPage) -> Self { + Self::new(unsafe { + // SAFETY: Rust references always points to non-null addresses. + NonNull::new_unchecked(&raw const *raw_page_ref as *mut _) + }) + } + pub const fn new(ptr: NonNull) -> Self { Self(ptr) } @@ -215,50 +203,68 @@ impl BuddyRawPage for RawPagePtr { } } -impl SlabRawPage for RawPagePtr { - unsafe fn from_link(link: &mut Link) -> Self { - let raw_page_ptr = container_of!(link, RawPage, link); - Self(raw_page_ptr) - } +impl SlabPage for RawPage { + fn get_data_ptr(&self) -> NonNull<[u8]> { + let raw_page_ptr = RawPagePtr::from_ref(self); + let paddr_start = PAddr::from(PFN::from(raw_page_ptr)); + let page_data_ptr = unsafe { paddr_start.as_ptr() }; - unsafe fn get_link(&self) -> &mut Link { - &mut self.as_mut().link + NonNull::slice_from_raw_parts(page_data_ptr, 1 << (self.order + 12)) } - fn in_which(ptr: *mut u8) -> RawPagePtr { + fn get_free_slot(&self) -> Option> { unsafe { - // SAFETY: The pointer is allocated from the slab allocator, - // which can't be null. - let ptr = NonNull::new_unchecked(ptr); + // SAFETY: TODO + self.shared_data.slab.free_next + } + } - // SAFETY: The pointer is valid. - let paddr = ArchPhysAccess::from_ptr(ptr); - let pfn = PFN::from(paddr); + fn set_free_slot(&mut self, next: Option>) { + self.shared_data.slab.free_next = next; + } - RawPagePtr::from(pfn) + fn get_alloc_count(&self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count } } - fn allocated_count(&self) -> &mut u32 { - &mut self.as_mut().shared_data.slab_data().allocated_count - } + fn inc_alloc_count(&mut self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count += 1; - fn next_free(&self) -> &mut Option> { - &mut self.as_mut().shared_data.slab_data().free_next + self.shared_data.slab.allocated_count + } } - fn real_page_ptr(&self) -> *mut u8 { - self.real_ptr().as_ptr() + fn dec_alloc_count(&mut self) -> usize { + unsafe { + // SAFETY: TODO + self.shared_data.slab.allocated_count -= 1; + + self.shared_data.slab.allocated_count + } } - fn slab_init(&self, first_free: Option>) { - self.as_mut().shared_data = PageType::Slab(SlabPageInner::new(first_free)); + unsafe fn from_allocated(ptr: NonNull) -> &'static mut Self { + unsafe { + // SAFETY: The caller ensures that `ptr` is valid. + let paddr = ArchPhysAccess::from_ptr(ptr); + let pfn = PFN::from(paddr); + + RawPagePtr::from(pfn).as_mut() + } } } impl PageCacheRawPage for RawPagePtr { fn valid_size(&self) -> &mut usize { - &mut self.as_mut().shared_data.page_cache_data().valid_size + unsafe { + // SAFETY: The caller ensures that the page is in some page cache. + &mut self.as_mut().shared_data.page_cache.valid_size + } } fn is_dirty(&self) -> bool { @@ -274,6 +280,59 @@ impl PageCacheRawPage for RawPagePtr { } fn cache_init(&self) { - self.as_mut().shared_data = PageType::PageCache(PageCacheInner { valid_size: 0 }); + self.as_mut().shared_data.page_cache = PageCacheData { valid_size: 0 }; + } +} + +pub struct RawSlabPageList(List); + +impl SlabPageList for RawSlabPageList { + type Page = RawPage; + + fn new() -> Self { + Self(List::new()) + } + + fn is_empty(&self) -> bool { + self.0.is_empty() + } + + fn peek_head(&mut self) -> Option<&mut Self::Page> { + unsafe { + let link = self.0.head()?; + let mut raw_page_ptr = container_of!(link, RawPage, link); + + Some(raw_page_ptr.as_mut()) + } + } + + fn pop_head(&mut self) -> Option<&'static mut Self::Page> { + unsafe { + let link = self.0.pop()?; + let mut raw_page_ptr = container_of!(link, RawPage, link); + + Some(raw_page_ptr.as_mut()) + } + } + + fn push_tail(&mut self, page: &'static mut Self::Page) { + self.0.insert(&mut page.link); + } + + fn remove(&mut self, page: &mut Self::Page) { + self.0.remove(&mut page.link) + } +} + +impl SlabPageAlloc for GlobalPageAlloc { + type Page = RawPage; + type PageList = RawSlabPageList; + + unsafe fn alloc_uninit(&self) -> &'static mut RawPage { + let raw_page = self.alloc().expect("Out of memory").as_mut(); + raw_page.flags.set(PageFlags::SLAB); + raw_page.shared_data.slab = SlabPageData::new(); + + raw_page } } From f17236f46e26964779a64ec90098e2aa9de9fd38 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 7 Jan 2026 02:16:47 +0800 Subject: [PATCH 08/25] mem, buddy: rework the buddy system Introduce `Zone`s: a Zone is a region of physical memory that all share the same NUMA node. The Zone will hold all RawPage structs. Buddy allocator will now store a reference to the zone that hold all its pages. Thus, we make the buddy allocator independent of underlying physical page frame management framework. Remove unnecessary page flags and structs. Signed-off-by: greatbridf --- Cargo.lock | 1 - crates/buddy_allocator/Cargo.toml | 1 - crates/buddy_allocator/src/free_area.rs | 59 ------ crates/buddy_allocator/src/lib.rs | 265 +++++++++++++++++++----- crates/buddy_allocator/src/zone.rs | 146 ------------- crates/eonix_mm/src/paging.rs | 4 + crates/eonix_mm/src/paging/list.rs | 19 ++ crates/eonix_mm/src/paging/raw_page.rs | 9 +- crates/eonix_mm/src/paging/zone.rs | 20 ++ crates/slab_allocator/src/lib.rs | 35 ++-- src/kernel/mem/page_alloc.rs | 116 +++++------ src/kernel/mem/page_alloc/raw_page.rs | 87 ++++---- src/kernel/mem/page_alloc/zones.rs | 25 +++ src/kernel_init.rs | 24 +-- 14 files changed, 407 insertions(+), 404 deletions(-) delete mode 100644 crates/buddy_allocator/src/free_area.rs delete mode 100644 crates/buddy_allocator/src/zone.rs create mode 100644 crates/eonix_mm/src/paging/list.rs create mode 100644 crates/eonix_mm/src/paging/zone.rs create mode 100644 src/kernel/mem/page_alloc/zones.rs diff --git a/Cargo.lock b/Cargo.lock index 3e8a36bd..896ec493 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,7 +70,6 @@ name = "buddy_allocator" version = "0.1.0" dependencies = [ "eonix_mm", - "intrusive_list", ] [[package]] diff --git a/crates/buddy_allocator/Cargo.toml b/crates/buddy_allocator/Cargo.toml index 51f02295..bdb0a28c 100644 --- a/crates/buddy_allocator/Cargo.toml +++ b/crates/buddy_allocator/Cargo.toml @@ -5,4 +5,3 @@ edition = "2024" [dependencies] eonix_mm = { path = "../eonix_mm" } -intrusive_list = { path = "../intrusive_list" } diff --git a/crates/buddy_allocator/src/free_area.rs b/crates/buddy_allocator/src/free_area.rs deleted file mode 100644 index 837f733f..00000000 --- a/crates/buddy_allocator/src/free_area.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::BuddyRawPage; -use core::marker::{PhantomData, Send, Sync}; -use intrusive_list::Link; - -pub struct FreeArea { - free_list: Link, - count: usize, - _phantom: PhantomData, -} - -unsafe impl Send for FreeArea {} -unsafe impl Sync for FreeArea {} - -impl FreeArea -where - Raw: BuddyRawPage, -{ - pub const fn new() -> Self { - Self { - free_list: Link::new(), - count: 0, - _phantom: PhantomData, - } - } - - pub fn get_free_pages(&mut self) -> Option { - self.free_list.next_mut().map(|pages_link| { - assert_ne!(self.count, 0); - - let pages_ptr = unsafe { - // SAFETY: Items in `self.free_list` are guaranteed to be of type `Raw`. - Raw::from_link(pages_link) - }; - - self.count -= 1; - pages_link.remove(); - - pages_ptr - }) - } - - pub fn add_pages(&mut self, pages_ptr: Raw) { - self.count += 1; - pages_ptr.set_free(); - - unsafe { - self.free_list.insert(pages_ptr.get_link()); - } - } - - pub fn del_pages(&mut self, pages_ptr: Raw) { - assert!(self.count >= 1 && pages_ptr.is_free()); - self.count -= 1; - pages_ptr.clear_free(); - unsafe { - pages_ptr.get_link().remove(); - } - } -} diff --git a/crates/buddy_allocator/src/lib.rs b/crates/buddy_allocator/src/lib.rs index f8c8eeda..abe1ef7b 100644 --- a/crates/buddy_allocator/src/lib.rs +++ b/crates/buddy_allocator/src/lib.rs @@ -1,87 +1,250 @@ #![no_std] -mod free_area; -mod zone; +use core::hint::unreachable_unchecked; -use core::sync::atomic::Ordering; -use eonix_mm::{ - address::PAddr, - paging::{RawPage, PFN}, -}; -use intrusive_list::Link; -use zone::Zone; +use eonix_mm::address::{AddrOps as _, PAddr, PRange}; +use eonix_mm::paging::{PageList, PageListSized, Zone, PFN}; const MAX_ORDER: u32 = 10; -const ZONE_AREAS: usize = const { MAX_ORDER as usize + 1 }; +const AREAS: usize = const { MAX_ORDER as usize + 1 }; -pub trait BuddyRawPage: RawPage { - /// Get the container raw page struct of the list link. - /// - /// # Safety - /// The caller MUST ensure that the link points to a `RawPage`. - unsafe fn from_link(link: &mut Link) -> Self; - - /// Get the list link of the raw page. - /// - /// # Safety - /// The caller MUST ensure that at any time, only one mutable reference - /// to the link exists. - unsafe fn get_link(&self) -> &mut Link; - - fn set_order(&self, order: u32); +pub trait BuddyPage: Sized + 'static { + fn pfn(&self) -> PFN; + fn get_order(&self) -> u32; fn is_buddy(&self) -> bool; - fn is_free(&self) -> bool; - fn set_buddy(&self); - fn set_free(&self); + fn set_order(&mut self, order: u32); + fn set_buddy(&mut self, value: bool); +} - fn clear_buddy(&self); - fn clear_free(&self); +struct FreeArea +where + L: PageList, +{ + free_list: L, + count: usize, } -pub struct BuddyAllocator +unsafe impl Send for FreeArea where L: PageList {} +unsafe impl Sync for FreeArea where L: PageList {} + +pub struct BuddyAllocator where - T: BuddyRawPage, + Z: Zone + 'static, + L: PageList, { - zone: Zone, + zone: &'static Z, + free_areas: [FreeArea; AREAS], } -impl BuddyAllocator +impl BuddyAllocator where - T: BuddyRawPage, + Z: Zone + 'static, + Z::Page: BuddyPage, + L: PageListSized, { - pub const fn new() -> Self { - Self { zone: Zone::new() } + pub const fn new(zone: &'static Z) -> Self { + Self { + zone, + free_areas: [const { FreeArea::new() }; AREAS], + } } +} +impl BuddyAllocator +where + Z: Zone, + L: PageList, + P: BuddyPage + 'static, +{ pub fn create_pages(&mut self, start: PAddr, end: PAddr) { - self.zone.create_pages(start, end); + assert!( + self.zone + .contains_prange(PRange::new(start.ceil(), end.floor())), + "The given address range is not within the zone." + ); + + let mut pfn = PFN::from(start.ceil()); + let end_pfn = PFN::from(end.floor()); + + while pfn < end_pfn { + let mut order = usize::from(pfn).trailing_zeros().min(MAX_ORDER); + let new_end_pfn = loop { + let new_end = pfn + (1 << order); + + if new_end <= end_pfn { + break new_end; + } + + order -= 1; + }; + + unsafe { + // SAFETY: We've checked that the range is within the zone above. + self.add_page_unchecked(pfn, order) + }; + + pfn = new_end_pfn; + } + } + + fn add_page(&mut self, pfn: PFN, order: u32) { + let prange = PRange::from(PAddr::from(pfn)).grow(1 << (order + 12)); + assert!( + self.zone.contains_prange(prange), + "The given page is not within the zone." + ); + + unsafe { + // SAFETY: Checks above. + self.add_page_unchecked(pfn, order); + } + } + + unsafe fn add_page_unchecked(&mut self, pfn: PFN, order: u32) { + let Some(page) = self.zone.get_page(pfn) else { + unsafe { unreachable_unchecked() } + }; + + unsafe { + // SAFETY: The caller ensures that the page is unused. + let page_mut = &mut *page.get(); + self.free_areas[order as usize].add_page(page_mut, order); + } + } + + fn break_page(&mut self, page: &mut P, order: u32, target_order: u32) { + let pfn = page.pfn(); + + for order in (target_order..order).rev() { + let buddy_pfn = pfn + (1 << order); + + unsafe { + // SAFETY: We got the page from `self.free_areas`. Checks are + // done when we've put the page into the buddy system. + self.add_page_unchecked(buddy_pfn, order); + } + } + + page.set_order(target_order); + } + + pub fn alloc_order(&mut self, order: u32) -> Option<&'static mut Z::Page> { + for current_order in order..AREAS as u32 { + let Some(page) = self.free_areas[current_order as usize].get_free_page() else { + continue; + }; + + if current_order > order { + self.break_page(page, current_order, order); + } + + return Some(page); + } + + None + } + + pub unsafe fn dealloc(&mut self, page: &'static mut Z::Page) { + let mut pfn = page.pfn(); + let mut order = page.get_order(); + + assert!( + !page.is_buddy(), + "Trying to free a page that is already in the buddy system: {pfn:?}", + ); + + while order < MAX_ORDER { + let buddy_pfn = pfn.buddy_pfn(order); + let Some(buddy_page) = self.try_get_buddy(buddy_pfn, order) else { + break; + }; + + self.free_areas[order as usize].remove_page(buddy_page); + pfn = pfn.combined_pfn(buddy_pfn); + order += 1; + } + + self.add_page(pfn, order); } - pub fn alloc_order(&mut self, order: u32) -> Option { - let pages_ptr = self.zone.get_free_pages(order); + /// This function checks whether the given page is within our [`Zone`] and + /// is a free buddy page with the specified order. + /// + /// We can assure exclusive access to a buddy page of [`order`] if + /// - the buddy is within the same [`Zone`] as us. + /// - the buddy is a free buddy (in some [`FreeArea`]) + /// - the buddy has order [`order`] + fn try_get_buddy<'a>(&mut self, buddy_pfn: PFN, order: u32) -> Option<&'a mut P> { + let buddy_page = self.zone.get_page(buddy_pfn)?; + + unsafe { + // SAFETY: We just test whether the page is a buddy. + let buddy_page_ref = &*buddy_page.get(); - if let Some(pages_ptr) = pages_ptr { - // SAFETY: Memory order here can be Relaxed is for the same reason as that - // in the copy constructor of `std::shared_ptr`. - pages_ptr.refcount().fetch_add(1, Ordering::Relaxed); - pages_ptr.clear_free(); + if !buddy_page_ref.is_buddy() { + return None; + } + + // Sad... + if buddy_page_ref.get_order() != order { + return None; + } + + // SAFETY: We have the mutable reference to the buddy allocator. + // So all the pages within are exclusively accessible to us. + Some(&mut *buddy_page.get()) + } + } +} + +impl FreeArea +where + L: PageListSized, +{ + const fn new() -> Self { + Self { + free_list: L::NEW, + count: 0, } + } +} + +impl FreeArea +where + L: PageList, + L::Page: BuddyPage + 'static, +{ + pub fn get_free_page(&mut self) -> Option<&'static mut L::Page> { + self.free_list.pop_head().map(|page| { + assert_ne!(self.count, 0, "Oops"); + + page.set_buddy(false); + self.count -= 1; - pages_ptr + page + }) } - pub unsafe fn dealloc(&mut self, page_ptr: T) { - self.zone.free_pages(page_ptr); + pub fn add_page(&mut self, page: &'static mut L::Page, order: u32) { + page.set_order(order); + page.set_buddy(true); + + self.count += 1; + self.free_list.push_tail(page); } - pub fn has_management_over(page_ptr: T) -> bool { - !page_ptr.is_free() && page_ptr.is_buddy() + pub fn remove_page(&mut self, page: &mut L::Page) { + assert_ne!(self.count, 0, "Oops"); + page.set_buddy(false); + + self.count -= 1; + self.free_list.remove(page); } } -pub(self) trait BuddyPFNOps { +trait BuddyPFNOps { fn buddy_pfn(self, order: u32) -> PFN; fn combined_pfn(self, buddy_pfn: PFN) -> PFN; } diff --git a/crates/buddy_allocator/src/zone.rs b/crates/buddy_allocator/src/zone.rs deleted file mode 100644 index 2c850ef9..00000000 --- a/crates/buddy_allocator/src/zone.rs +++ /dev/null @@ -1,146 +0,0 @@ -use super::free_area::FreeArea; -use crate::{BuddyPFNOps as _, BuddyRawPage}; -use core::sync::atomic::Ordering; -use eonix_mm::{ - address::{AddrOps as _, PAddr}, - paging::PFN, -}; - -pub(super) struct Zone { - free_areas: [FreeArea; AREAS], -} - -impl Zone -where - Raw: BuddyRawPage, -{ - pub const fn new() -> Self { - Self { - free_areas: [const { FreeArea::new() }; AREAS], - } - } - - pub fn get_free_pages(&mut self, order: u32) -> Option { - for current_order in order..AREAS as u32 { - let pages_ptr = self.free_areas[current_order as usize].get_free_pages(); - let Some(pages_ptr) = pages_ptr else { continue }; - - pages_ptr.set_order(order); - - if current_order > order { - self.expand(pages_ptr, current_order, order); - } - - assert!( - pages_ptr.is_present(), - "Page {:?} is not present", - pages_ptr.into(), - ); - - assert!( - pages_ptr.is_free(), - "Page {:?} is not free", - pages_ptr.into(), - ); - - return Some(pages_ptr); - } - None - } - - fn expand(&mut self, pages_ptr: Raw, order: u32, target_order: u32) { - let mut offset = 1 << order; - let pages_pfn = Into::::into(pages_ptr); - - for order in (target_order..order).rev() { - offset >>= 1; - - let split_pages_ptr = Raw::from(pages_pfn + offset); - split_pages_ptr.set_order(order); - split_pages_ptr.set_buddy(); - self.free_areas[order as usize].add_pages(split_pages_ptr); - } - } - - pub fn free_pages(&mut self, mut pages_ptr: Raw) { - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - - let mut pfn = Into::::into(pages_ptr); - let mut current_order = pages_ptr.order(); - - assert!( - pages_ptr.is_present(), - "Freeing a page that is not present: {:?}", - pages_ptr.into(), - ); - - assert!( - !pages_ptr.is_free(), - "Freeing a page that is free: {:?}", - pages_ptr.into(), - ); - - while current_order < (AREAS - 1) as u32 { - let buddy_pfn = pfn.buddy_pfn(current_order); - let buddy_pages_ptr = Raw::from(buddy_pfn); - - if !self.buddy_check(buddy_pages_ptr, current_order) { - break; - } - - pages_ptr.clear_buddy(); - buddy_pages_ptr.clear_buddy(); - self.free_areas[current_order as usize].del_pages(buddy_pages_ptr); - - pages_ptr = Raw::from(pfn.combined_pfn(buddy_pfn)); - pfn = pfn.combined_pfn(buddy_pfn); - - pages_ptr.set_buddy(); - current_order += 1; - } - - pages_ptr.set_order(current_order); - self.free_areas[current_order as usize].add_pages(pages_ptr); - } - - /// This function checks whether a page is free && is a buddy - /// we can coalesce a page and its buddy if - /// - the buddy is valid(present) && - /// - the buddy is right now in free_areas && - /// - a page and its buddy have the same order && - /// - a page and its buddy are in the same zone (on smp systems). - fn buddy_check(&self, pages_ptr: Raw, order: u32) -> bool { - if !pages_ptr.is_present() { - return false; - } - if !pages_ptr.is_free() { - return false; - } - if pages_ptr.order() != order { - return false; - } - - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - true - } - - /// Only used on buddy initialization - pub fn create_pages(&mut self, start: PAddr, end: PAddr) { - let mut start_pfn = PFN::from(start.ceil()); - let end_pfn = PFN::from(end.floor()); - - while start_pfn < end_pfn { - let mut order = usize::from(start_pfn) - .trailing_zeros() - .min((AREAS - 1) as u32); - - while start_pfn + (1 << order) as usize > end_pfn { - order -= 1; - } - let page_ptr = Raw::from(start_pfn); - page_ptr.set_buddy(); - self.free_areas[order as usize].add_pages(page_ptr); - start_pfn = start_pfn + (1 << order) as usize; - } - } -} diff --git a/crates/eonix_mm/src/paging.rs b/crates/eonix_mm/src/paging.rs index 88da902e..0c4811f2 100644 --- a/crates/eonix_mm/src/paging.rs +++ b/crates/eonix_mm/src/paging.rs @@ -1,9 +1,13 @@ +mod list; mod page; mod page_alloc; mod pfn; mod raw_page; +mod zone; +pub use list::{PageList, PageListSized}; pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; pub use page_alloc::{GlobalPageAlloc, NoAlloc, PageAlloc}; pub use pfn::PFN; pub use raw_page::{RawPage, UnmanagedRawPage}; +pub use zone::Zone; diff --git a/crates/eonix_mm/src/paging/list.rs b/crates/eonix_mm/src/paging/list.rs new file mode 100644 index 00000000..a52cf947 --- /dev/null +++ b/crates/eonix_mm/src/paging/list.rs @@ -0,0 +1,19 @@ +pub trait PageList { + type Page; + + fn is_empty(&self) -> bool; + + fn peek_head(&mut self) -> Option<&mut Self::Page>; + + fn pop_head(&mut self) -> Option<&'static mut Self::Page>; + fn push_tail(&mut self, page: &'static mut Self::Page); + fn remove(&mut self, page: &mut Self::Page); +} + +pub trait PageListSized: PageList + Sized { + const NEW: Self; + + fn new() -> Self { + Self::NEW + } +} diff --git a/crates/eonix_mm/src/paging/raw_page.rs b/crates/eonix_mm/src/paging/raw_page.rs index 7951729d..789e863b 100644 --- a/crates/eonix_mm/src/paging/raw_page.rs +++ b/crates/eonix_mm/src/paging/raw_page.rs @@ -1,14 +1,13 @@ -use super::PFN; use core::sync::atomic::AtomicUsize; +use super::PFN; + /// A `RawPage` represents a page of memory in the kernel. It is a low-level /// representation of a page that is used by the kernel to manage memory. #[doc(notable_trait)] pub trait RawPage: Clone + Copy + From + Into { fn order(&self) -> u32; fn refcount(&self) -> &AtomicUsize; - - fn is_present(&self) -> bool; } #[derive(Clone, Copy)] @@ -45,8 +44,4 @@ impl RawPage for UnmanagedRawPage { fn refcount(&self) -> &AtomicUsize { &UNMANAGED_RAW_PAGE_CLONE_COUNT } - - fn is_present(&self) -> bool { - true - } } diff --git a/crates/eonix_mm/src/paging/zone.rs b/crates/eonix_mm/src/paging/zone.rs new file mode 100644 index 00000000..ec3ed15e --- /dev/null +++ b/crates/eonix_mm/src/paging/zone.rs @@ -0,0 +1,20 @@ +use core::cell::UnsafeCell; + +#[allow(unused_imports)] +use super::{Page, PageAlloc, RawPage, PFN}; +use crate::address::PRange; + +/// A [`Zone`] holds a lot of [`Page`]s that share the same NUMA node or +/// "physical location". +pub trait Zone: Send + Sync { + type Page; + + /// Whether the [`range`] is within this [`Zone`]. + fn contains_prange(&self, range: PRange) -> bool; + + /// Get the [`RawPage`] that [`pfn`] points to. + /// + /// # Return + /// [`None`] if [`pfn`] is not in this [`Zone`]. + fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell>; +} diff --git a/crates/slab_allocator/src/lib.rs b/crates/slab_allocator/src/lib.rs index 8a684edd..8597331d 100644 --- a/crates/slab_allocator/src/lib.rs +++ b/crates/slab_allocator/src/lib.rs @@ -2,6 +2,7 @@ use core::ptr::NonNull; +use eonix_mm::paging::{PageList, PageListSized}; use eonix_sync::Spin; #[repr(C)] @@ -10,19 +11,6 @@ pub union SlabSlot { data: u8, } -pub trait SlabPageList: Sized { - type Page: SlabPage; - - fn new() -> Self; - fn is_empty(&self) -> bool; - - fn peek_head(&mut self) -> Option<&mut Self::Page>; - - fn pop_head(&mut self) -> Option<&'static mut Self::Page>; - fn push_tail(&mut self, page: &'static mut Self::Page); - fn remove(&mut self, page: &mut Self::Page); -} - pub trait SlabPage: Sized + 'static { fn get_data_ptr(&self) -> NonNull<[u8]>; @@ -98,7 +86,7 @@ where pub trait SlabPageAlloc { type Page: SlabPage; - type PageList: SlabPageList; + type PageList: PageList; /// Allocate a page suitable for slab system use. The page MUST come with /// its allocation count 0 and next free slot None. @@ -110,7 +98,7 @@ pub trait SlabPageAlloc { pub(crate) struct SlabList where - T: SlabPageList, + T: PageList, { empty_list: T, partial_list: T, @@ -132,6 +120,7 @@ unsafe impl Sync for SlabAlloc where P: SlabPag impl SlabAlloc where L: SlabPageAlloc, + L::PageList: PageListSized, { pub fn new_in(alloc: L) -> Self { Self { @@ -159,17 +148,23 @@ where impl SlabList where - T: SlabPageList, + T: PageListSized, { - fn new(object_size: usize) -> Self { + const fn new(object_size: usize) -> Self { Self { - empty_list: T::new(), - partial_list: T::new(), - full_list: T::new(), + empty_list: T::NEW, + partial_list: T::NEW, + full_list: T::NEW, object_size, } } +} +impl SlabList +where + T: PageList, + T::Page: SlabPage, +{ fn alloc_from_partial(&mut self) -> NonNull { let head = self.partial_list.peek_head().unwrap(); let slot = head.alloc_slot().unwrap(); diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index 1c018f37..9dce4567 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -1,19 +1,25 @@ mod raw_page; +mod zones; use core::sync::atomic::Ordering; -use buddy_allocator::{BuddyAllocator, BuddyRawPage as _}; +use buddy_allocator::BuddyAllocator; use eonix_mm::address::{AddrOps as _, PRange}; -use eonix_mm::paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN}; +use eonix_mm::paging::{ + GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, PFN, +}; +use eonix_preempt::PreemptGuard; use eonix_sync::{NoContext, Spin}; -use intrusive_list::List; -use raw_page::PageFlags; +use raw_page::{PageFlags, RawPageList}; pub use raw_page::{RawPage, RawPagePtr}; +pub use zones::GlobalZone; const COSTLY_ORDER: u32 = 3; +const AREAS: usize = COSTLY_ORDER as usize + 1; const BATCH_SIZE: u32 = 64; -static BUDDY_ALLOC: Spin> = Spin::new(BuddyAllocator::new()); +static BUDDY_ALLOC: Spin> = + Spin::new(BuddyAllocator::new(&GlobalZone())); #[eonix_percpu::define_percpu] static PERCPU_PAGE_ALLOC: PerCpuPageAlloc = PerCpuPageAlloc::new(); @@ -26,58 +32,42 @@ pub struct BuddyPageAlloc(); struct PerCpuPageAlloc { batch: u32, - // TODO: might be used in the future. - // high: u32, - free_areas: [List; COSTLY_ORDER as usize + 1], + free_areas: [RawPageList; AREAS], +} + +pub trait PerCpuPage { + fn set_local(&mut self, val: bool); } impl PerCpuPageAlloc { const fn new() -> Self { Self { batch: BATCH_SIZE, - // high: 0, - free_areas: [const { List::new() }; COSTLY_ORDER as usize + 1], + free_areas: [RawPageList::NEW; AREAS], } } - fn insert_free_pages(&mut self, pages_ptr: RawPagePtr, order: u32) { - let free_area = &mut self.free_areas[order as usize]; - free_area.insert(unsafe { pages_ptr.get_link() }); - } - - fn get_free_pages(&mut self, order: u32) -> Option { - let free_area = &mut self.free_areas[order as usize]; - free_area.pop().map(|node| unsafe { - // SAFETY: `node` is a valid pointer to a `Link` that is not used by anyone. - RawPagePtr::from_link(node) - }) - } - - fn alloc_order(&mut self, order: u32) -> Option { + fn alloc_order(&mut self, order: u32) -> Option<&'static mut RawPage> { assert!(order <= COSTLY_ORDER); - if let Some(pages) = self.get_free_pages(order) { + if let Some(pages) = self.free_areas[order as usize].pop_head() { return Some(pages); } let batch = self.batch >> order; for _ in 0..batch { - if let Some(pages_ptr) = BUDDY_ALLOC.lock().alloc_order(order) { - pages_ptr.flags().set(PageFlags::LOCAL); - self.insert_free_pages(pages_ptr, order); - } else { + let Some(page) = BUDDY_ALLOC.lock().alloc_order(order) else { break; }; + + page.set_local(true); + self.free_areas[order as usize].push_tail(page); } - self.get_free_pages(order) + self.free_areas[order as usize].pop_head() } - fn free_pages(&mut self, pages_ptr: RawPagePtr, order: u32) { - assert_eq!(pages_ptr.order(), order); - assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0); - - pages_ptr.refcount().store(1, Ordering::Relaxed); - self.insert_free_pages(pages_ptr, order); + fn free_pages(&mut self, page: &'static mut RawPage, order: u32) { + self.free_areas[order as usize].push_tail(page); } } @@ -87,16 +77,6 @@ impl GlobalPageAlloc { BuddyPageAlloc() } - pub fn mark_present(range: PRange) { - let mut pfn = PFN::from(range.start().ceil()); - let end_pfn = PFN::from(range.end().floor()); - - while pfn < end_pfn { - RawPagePtr::from(pfn).flags().set(PageFlags::PRESENT); - pfn = pfn + 1; - } - } - /// Add the pages in the PAddr range `range` to the global allocator. /// /// This function is only to be called on system initialization when `eonix_preempt` @@ -116,34 +96,47 @@ impl PageAlloc for GlobalPageAlloc { type RawPage = RawPagePtr; fn alloc_order(&self, order: u32) -> Option { - if order > COSTLY_ORDER { + let raw_page = if order > COSTLY_ORDER { BUDDY_ALLOC.lock().alloc_order(order) } else { unsafe { eonix_preempt::disable(); - let page_ptr = PERCPU_PAGE_ALLOC.as_mut().alloc_order(order); + let page = PERCPU_PAGE_ALLOC.as_mut().alloc_order(order); eonix_preempt::enable(); - page_ptr + + page } - } + }; + + raw_page.map(|raw_page| { + // SAFETY: Memory order here can be Relaxed is for the same reason + // as that in the copy constructor of `std::shared_ptr`. + raw_page.refcount.fetch_add(1, Ordering::Relaxed); + + RawPagePtr::from_ref(raw_page) + }) } unsafe fn dealloc(&self, page_ptr: RawPagePtr) { + assert_eq!( + page_ptr.refcount().load(Ordering::Relaxed), + 0, + "Trying to free a page with refcount > 0" + ); + if page_ptr.order() > COSTLY_ORDER { - BUDDY_ALLOC.lock().dealloc(page_ptr); + BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); } else { let order = page_ptr.order(); + unsafe { - eonix_preempt::disable(); - PERCPU_PAGE_ALLOC.as_mut().free_pages(page_ptr, order); - eonix_preempt::enable(); + PreemptGuard::new(PERCPU_PAGE_ALLOC.as_mut()).free_pages(page_ptr.as_mut(), order); } } } fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - BuddyAllocator::has_management_over(page_ptr) - && (page_ptr.order() > COSTLY_ORDER || page_ptr.flags().has(PageFlags::LOCAL)) + page_ptr.order() > COSTLY_ORDER || page_ptr.flags().has(PageFlags::LOCAL) } } @@ -157,14 +150,17 @@ impl PageAlloc for BuddyPageAlloc { type RawPage = RawPagePtr; fn alloc_order(&self, order: u32) -> Option { - BUDDY_ALLOC.lock().alloc_order(order) + BUDDY_ALLOC + .lock() + .alloc_order(order) + .map(|raw_page| RawPagePtr::from_ref(raw_page)) } unsafe fn dealloc(&self, page_ptr: RawPagePtr) { - BUDDY_ALLOC.lock().dealloc(page_ptr); + BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); } - fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - BuddyAllocator::has_management_over(page_ptr) + fn has_management_over(&self, _: RawPagePtr) -> bool { + true } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index d793ccd7..0baa7b9a 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -1,18 +1,18 @@ use core::ptr::NonNull; use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; -use buddy_allocator::BuddyRawPage; +use buddy_allocator::BuddyPage; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess as _}; -use eonix_mm::paging::{PageAlloc, RawPage as RawPageTrait, PFN}; +use eonix_mm::paging::{PageAlloc, PageList, PageListSized, RawPage as RawPageTrait, PFN}; use intrusive_list::{container_of, Link, List}; -use slab_allocator::{SlabPage, SlabPageAlloc, SlabPageList, SlabSlot}; +use slab_allocator::{SlabPage, SlabPageAlloc, SlabSlot}; -use super::GlobalPageAlloc; +use super::{GlobalPageAlloc, PerCpuPage}; use crate::kernel::mem::page_cache::PageCacheRawPage; use crate::kernel::mem::PhysAccess; -const PAGE_ARRAY: NonNull = +pub const PAGE_ARRAY: NonNull = unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; pub struct PageFlags(AtomicU32); @@ -52,21 +52,23 @@ pub struct RawPage { /// This field is only used in buddy system and is protected by the global lock. order: u32, flags: PageFlags, - refcount: AtomicUsize, + pub refcount: AtomicUsize, shared_data: PageData, } +// XXX: introduce Folio and remove this. +unsafe impl Send for RawPage {} +unsafe impl Sync for RawPage {} + #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct RawPagePtr(NonNull); impl PageFlags { - pub const PRESENT: u32 = 1 << 0; pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; pub const SLAB: u32 = 1 << 3; pub const DIRTY: u32 = 1 << 4; - pub const FREE: u32 = 1 << 5; pub const LOCAL: u32 = 1 << 6; pub fn has(&self, flag: u32) -> bool { @@ -158,48 +160,31 @@ impl RawPageTrait for RawPagePtr { fn refcount(&self) -> &AtomicUsize { self.refcount() } - - fn is_present(&self) -> bool { - self.flags().has(PageFlags::PRESENT) - } } -impl BuddyRawPage for RawPagePtr { - unsafe fn from_link(link: &mut Link) -> Self { - let raw_page_ptr = container_of!(link, RawPage, link); - Self(raw_page_ptr) - } - - fn set_order(&self, order: u32) { - self.as_mut().order = order; +impl BuddyPage for RawPage { + fn pfn(&self) -> PFN { + PFN::from(RawPagePtr::from_ref(self)) } - unsafe fn get_link(&self) -> &mut Link { - &mut self.as_mut().link + fn get_order(&self) -> u32 { + self.order } fn is_buddy(&self) -> bool { - self.flags().has(PageFlags::BUDDY) - } - - fn is_free(&self) -> bool { - self.flags().has(PageFlags::FREE) - } - - fn set_buddy(&self) { - self.flags().set(PageFlags::BUDDY); - } - - fn set_free(&self) { - self.flags().set(PageFlags::FREE); + self.flags.has(PageFlags::BUDDY) } - fn clear_buddy(&self) { - self.flags().clear(PageFlags::BUDDY); + fn set_order(&mut self, order: u32) { + self.order = order; } - fn clear_free(&self) { - self.flags().clear(PageFlags::FREE); + fn set_buddy(&mut self, val: bool) { + if val { + self.flags.set(PageFlags::BUDDY); + } else { + self.flags.clear(PageFlags::BUDDY) + } } } @@ -284,15 +269,21 @@ impl PageCacheRawPage for RawPagePtr { } } -pub struct RawSlabPageList(List); +impl PerCpuPage for RawPage { + fn set_local(&mut self, val: bool) { + if val { + self.flags.set(PageFlags::LOCAL) + } else { + self.flags.clear(PageFlags::LOCAL) + } + } +} + +pub struct RawPageList(List); -impl SlabPageList for RawSlabPageList { +impl PageList for RawPageList { type Page = RawPage; - fn new() -> Self { - Self(List::new()) - } - fn is_empty(&self) -> bool { self.0.is_empty() } @@ -324,9 +315,13 @@ impl SlabPageList for RawSlabPageList { } } +impl PageListSized for RawPageList { + const NEW: Self = RawPageList(List::new()); +} + impl SlabPageAlloc for GlobalPageAlloc { type Page = RawPage; - type PageList = RawSlabPageList; + type PageList = RawPageList; unsafe fn alloc_uninit(&self) -> &'static mut RawPage { let raw_page = self.alloc().expect("Out of memory").as_mut(); diff --git a/src/kernel/mem/page_alloc/zones.rs b/src/kernel/mem/page_alloc/zones.rs new file mode 100644 index 00000000..7a2e4e33 --- /dev/null +++ b/src/kernel/mem/page_alloc/zones.rs @@ -0,0 +1,25 @@ +use core::cell::UnsafeCell; + +use eonix_mm::address::PRange; +use eonix_mm::paging::{Zone, PFN}; + +use super::RawPage; +use crate::kernel::mem::page_alloc::RawPagePtr; + +pub struct GlobalZone(); + +impl Zone for GlobalZone { + type Page = RawPage; + + fn contains_prange(&self, _: PRange) -> bool { + true + } + + fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell> { + unsafe { + // SAFETY: The pointer returned by [`RawPagePtr::as_ptr()`] is valid. + // And so is it wrapped with [`UnsafeCell`] + Some(&*(RawPagePtr::from(pfn).as_ptr() as *const UnsafeCell)) + } + } +} diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 3d8be90f..93b6da20 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -1,14 +1,11 @@ +use eonix_hal::bootstrap::BootStrapData; +use eonix_hal::mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE}; +use eonix_hal::traits::mm::Memory; +use eonix_mm::address::{Addr as _, AddrOps as _, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, PagingMode as _, PTE}; +use eonix_mm::paging::{Page as GenericPage, PAGE_SIZE, PFN}; + use crate::kernel::mem::{GlobalPageAlloc, RawPage}; -use eonix_hal::{ - bootstrap::BootStrapData, - mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE}, - traits::mm::Memory, -}; -use eonix_mm::{ - address::{Addr as _, AddrOps as _, VAddr, VRange}, - page_table::{PageAttribute, PagingMode as _, PTE}, - paging::{Page as GenericPage, PAGE_SIZE, PFN}, -}; pub fn setup_memory(data: &mut BootStrapData) { let addr_max = ArchMemory::present_ram() @@ -50,9 +47,10 @@ pub fn setup_memory(data: &mut BootStrapData) { ); } - for range in ArchMemory::present_ram() { - GlobalPageAlloc::mark_present(range); - } + // TODO!!!: Construct the global zone with all present ram. + // for range in ArchMemory::present_ram() { + // GlobalPageAlloc::mark_present(range); + // } if let Some(early_alloc) = data.take_alloc() { for range in early_alloc.into_iter() { From 45268e7d335e5403b7ea868379b5491bb5d0f26d Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 14 Jan 2026 01:09:49 +0800 Subject: [PATCH 09/25] vfs: rework of inode and page cache system - Bump rust compiler version to nightly-2026-01-09. - Inode rework: add a generic Inode struct. - Add a macro to help function tweaks. - PageCache rework: reduce complexity and try to decouple. - Adapt fat32, tmpfs to the new page cache system. - Change the way we process mapped pages and load ELF executables. - Refine handling flags in `MMArea::handle_mmap`. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 62 +- rust-toolchain | 2 +- src/fs/fat32.rs | 276 ++++----- src/fs/procfs.rs | 155 +++-- src/fs/tmpfs/dir.rs | 287 +++++----- src/fs/tmpfs/file.rs | 331 +++++------ src/fs/tmpfs/mod.rs | 23 +- src/kernel/mem.rs | 2 +- src/kernel/mem/mm_area.rs | 97 ++-- src/kernel/mem/mm_list/mapping.rs | 15 +- src/kernel/mem/mm_list/page_fault.rs | 5 +- src/kernel/mem/page_alloc.rs | 4 +- src/kernel/mem/page_alloc/raw_page.rs | 29 +- src/kernel/mem/page_cache.rs | 416 +++++--------- src/kernel/syscall/mm.rs | 21 +- src/kernel/task/loader/elf.rs | 70 +-- src/kernel/vfs/dentry.rs | 66 +-- src/kernel/vfs/dentry/walk.rs | 52 +- src/kernel/vfs/file/inode_file.rs | 36 +- src/kernel/vfs/filearray.rs | 46 +- src/kernel/vfs/inode/inode.rs | 531 +++++++++--------- src/kernel/vfs/inode/mod.rs | 2 +- src/kernel/vfs/inode/ops.rs | 5 +- src/kernel/vfs/inode/statx.rs | 32 +- src/kernel/vfs/mount.rs | 28 +- src/kernel/vfs/superblock.rs | 45 +- src/lib.rs | 1 - 27 files changed, 1152 insertions(+), 1487 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 0f1dff63..7b3dc043 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -1,39 +1,31 @@ -use super::{ - config::{self, mm::*}, - console::write_str, - cpu::{CPUID, CPU_COUNT}, - time::set_next_timer, -}; -use crate::{ - arch::{ - cpu::CPU, - fdt::{init_dtb_and_fdt, FdtExt, FDT}, - mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE}, - }, - bootstrap::BootStrapData, - mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}, -}; -use core::{ - alloc::Allocator, - arch::asm, - cell::RefCell, - sync::atomic::{AtomicBool, AtomicUsize}, -}; -use core::{ - arch::{global_asm, naked_asm}, - hint::spin_loop, - sync::atomic::{AtomicPtr, Ordering}, -}; +use core::alloc::Allocator; +use core::arch::{asm, global_asm, naked_asm}; +use core::cell::RefCell; +use core::hint::spin_loop; +use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; + use eonix_hal_traits::mm::Memory; -use eonix_mm::{ - address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}, - page_table::{PageAttribute, PagingMode, PTE as _}, - paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN}, -}; +use eonix_mm::address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}; +use eonix_mm::page_table::{PageAttribute, PagingMode, PTE as _}; +use eonix_mm::paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN}; use eonix_percpu::PercpuArea; use fdt::Fdt; -use riscv::{asm::sfence_vma_all, register::satp}; -use sbi::{hsm::hart_start, legacy::console_putchar, PhysicalAddress}; +use riscv::asm::sfence_vma_all; +use riscv::register::satp; +use sbi::hsm::hart_start; +use sbi::legacy::console_putchar; +use sbi::PhysicalAddress; + +use super::config::mm::*; +use super::config::{self}; +use super::console::write_str; +use super::cpu::{CPUID, CPU_COUNT}; +use super::time::set_next_timer; +use crate::arch::cpu::CPU; +use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; +use crate::arch::mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE}; +use crate::bootstrap::BootStrapData; +use crate::mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -78,7 +70,7 @@ static AP_SEM: AtomicBool = AtomicBool::new(false); #[unsafe(naked)] #[unsafe(no_mangle)] #[unsafe(link_section = ".bootstrap.entry")] -unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) -> ! { +unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) { naked_asm!( " ld sp, 2f @@ -289,7 +281,7 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { #[unsafe(naked)] #[unsafe(no_mangle)] #[unsafe(link_section = ".bootstrap.apentry")] -unsafe extern "C" fn _ap_start(hart_id: usize) -> ! { +unsafe extern "C" fn _ap_start(hart_id: usize) { naked_asm!( " la sp, 1f // set temp stack diff --git a/rust-toolchain b/rust-toolchain index 8adb8e58..11ad5efd 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly-2025-05-16 +nightly-2026-01-09 diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index 9a4e03ec..b19c9908 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,26 +1,22 @@ mod dir; mod file; -use alloc::sync::{Arc, Weak}; -use core::future::Future; +use alloc::sync::Arc; use core::ops::Deref; use async_trait::async_trait; use dir::{as_raw_dirents, ParseDirent}; +use eonix_mm::paging::PAGE_SIZE; use eonix_sync::RwLock; use itertools::Itertools; use crate::io::{Buffer, ByteBuffer, UninitBuffer}; use crate::kernel::block::{BlockDevice, BlockDeviceRequest}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::{ - CachePage, CachePageStream, Page, PageCache, PageCacheBackendOps, PageExcl, PageExt, -}; +use crate::kernel::mem::{CachePage, Page, PageExcl, PageExt, PageOffset}; use crate::kernel::timer::Instant; use crate::kernel::vfs::dentry::Dentry; -use crate::kernel::vfs::inode::{ - Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, -}; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; use crate::kernel::vfs::types::{DeviceId, Format, Permission}; use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; @@ -56,6 +52,10 @@ impl Cluster { Ino::new(self.0 as _) } + pub fn from_ino(ino: Ino) -> Self { + Self(ino.as_raw() as u32) + } + fn normalized(self) -> Self { Self(self.0 - 2) } @@ -130,7 +130,7 @@ impl FatFs { } impl FatFs { - pub async fn create(device: DeviceId) -> KResult<(SbUse, InodeUse)> { + pub async fn create(device: DeviceId) -> KResult<(SbUse, InodeUse)> { let device = BlockDevice::get(device)?; let mut info = UninitBuffer::::new(); @@ -217,18 +217,15 @@ impl<'fat> Iterator for ClusterIterator<'fat> { } } -struct FileInode { - cluster: Cluster, - info: Spin, - sb: SbRef, - page_cache: PageCache, -} +struct FileInode; impl FileInode { - fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { - InodeUse::new_cyclic(|weak: &Weak| Self { - cluster, - info: Spin::new(InodeInfo { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new( + sb, + cluster.as_ino(), + Format::REG, + InodeInfo { size: size as u64, nlink: 1, uid: 0, @@ -237,108 +234,75 @@ impl FileInode { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - sb, - page_cache: PageCache::new(weak.clone()), - }) + }, + Self, + ) } } impl InodeOps for FileInode { type SuperBlock = FatFs; - fn ino(&self) -> Ino { - self.cluster.as_ino() - } - - fn format(&self) -> Format { - Format::REG - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.page_cache) - } -} - -impl InodeDirOps for FileInode {} -impl InodeFileOps for FileInode { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - self.page_cache.read(buffer, offset).await + async fn read( + &self, + _: SbUse, + inode: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { + inode.get_page_cache().read(buffer, offset).await } - async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let sb = self.sb.get()?; + async fn read_page( + &self, + sb: SbUse, + inode: &InodeUse, + page: &mut CachePage, + offset: PageOffset, + ) -> KResult<()> { let fs = &sb.backend; let fat = sb.backend.fat.read().await; - if offset >= self.info.lock().size as usize { - return Ok(0); + if offset >= PageOffset::from_byte_ceil(inode.info.lock().size as usize) { + unreachable!("read_page called with offset beyond file size"); } let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; - assert!(cluster_size <= 0x1000, "Cluster size is too large"); - - let skip_clusters = offset / cluster_size; - let inner_offset = offset % cluster_size; - - let cluster_iter = ClusterIterator::new(fat.as_ref(), self.cluster).skip(skip_clusters); - - let buffer_page = Page::alloc(); - for cluster in cluster_iter { - fs.read_cluster(cluster, &buffer_page).await?; - - let pg = buffer_page.lock(); - let data = &pg.as_bytes()[inner_offset..]; - - let end = offset + data.len(); - let real_end = end.min(self.info.lock().size as usize); - let real_size = real_end - offset; - - if buffer.fill(&data[..real_size])?.should_stop() { - break; - } + if cluster_size != PAGE_SIZE { + unimplemented!("cluster size != PAGE_SIZE"); } - Ok(buffer.wrote()) - } -} + // XXX: Ugly and inefficient O(n^2) algorithm for sequential file read. + let cluster = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)) + .skip(offset.page_count()) + .next() + .ok_or(EIO)?; -impl PageCacheBackendOps for FileInode { - async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { - self.read_direct(page, offset).await - } + let page = page.get_page(); + fs.read_cluster(cluster, &page).await?; - async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - todo!() - } + let real_len = (inode.info.lock().size as usize) - offset.byte_count(); + if real_len < PAGE_SIZE { + let mut page = page.lock(); + page.as_bytes_mut()[real_len..].fill(0); + } - fn size(&self) -> usize { - self.info.lock().size as usize + Ok(()) } } struct DirInode { - cluster: Cluster, - info: Spin, - sb: SbRef, - // TODO: Use the new PageCache... dir_pages: RwLock>, } impl DirInode { - fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { - InodeUse::new(Self { - cluster, - info: Spin::new(InodeInfo { + fn new(cluster: Cluster, sb: SbRef, size: u32) -> InodeUse { + InodeUse::new( + sb, + cluster.as_ino(), + Format::DIR, + InodeInfo { size: size as u64, nlink: 2, // '.' and '..' uid: 0, @@ -347,23 +311,23 @@ impl DirInode { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - sb, - dir_pages: RwLock::new(Vec::new()), - }) + }, + Self { + dir_pages: RwLock::new(Vec::new()), + }, + ) } - async fn read_dir_pages(&self) -> KResult<()> { + async fn read_dir_pages(&self, sb: &SbUse, inode: &InodeUse) -> KResult<()> { let mut dir_pages = self.dir_pages.write().await; if !dir_pages.is_empty() { return Ok(()); } - let sb = self.sb.get()?; let fs = &sb.backend; let fat = fs.fat.read().await; - let clusters = ClusterIterator::new(fat.as_ref(), self.cluster); + let clusters = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)); for cluster in clusters { let page = PageExcl::alloc(); @@ -375,7 +339,11 @@ impl DirInode { Ok(()) } - async fn get_dir_pages(&self) -> KResult> + use<'_>> { + async fn get_dir_pages( + &self, + sb: &SbUse, + inode: &InodeUse, + ) -> KResult> + use<'_>> { { let dir_pages = self.dir_pages.read().await; if !dir_pages.is_empty() { @@ -383,7 +351,7 @@ impl DirInode { } } - self.read_dir_pages().await?; + self.read_dir_pages(sb, inode).await?; if let Some(dir_pages) = self.dir_pages.try_read() { return Ok(dir_pages); @@ -396,32 +364,13 @@ impl DirInode { impl InodeOps for DirInode { type SuperBlock = FatFs; - fn ino(&self) -> Ino { - self.cluster.as_ino() - } - - fn format(&self) -> Format { - Format::DIR - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } -} - -impl InodeFileOps for DirInode {} -impl InodeDirOps for DirInode { - async fn lookup(&self, dentry: &Arc) -> KResult>> { - let sb = self.sb.get()?; - let dir_pages = self.get_dir_pages().await?; + async fn lookup( + &self, + sb: SbUse, + inode: &InodeUse, + dentry: &Arc, + ) -> KResult> { + let dir_pages = self.get_dir_pages(&sb, inode).await?; let dir_data = dir_pages.iter().map(|pg| pg.as_bytes()); @@ -451,48 +400,47 @@ impl InodeDirOps for DirInode { Ok(None) } - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn readdir( + &self, + sb: SbUse, + inode: &InodeUse, offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - async move { - let sb = self.sb.get()?; - let fs = &sb.backend; - let dir_pages = self.get_dir_pages().await?; + callback: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let fs = &sb.backend; + let dir_pages = self.get_dir_pages(&sb, inode).await?; - let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; + let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE; - let cluster_offset = offset / cluster_size; - let inner_offset = offset % cluster_size; - let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); + let cluster_offset = offset / cluster_size; + let inner_offset = offset % cluster_size; + let inner_raw_dirent_offset = inner_offset / core::mem::size_of::(); - let dir_data = dir_pages - .iter() - .skip(cluster_offset) - .map(|pg| pg.as_bytes()); - - let raw_dirents = dir_data - .map(as_raw_dirents) - .take_while_inclusive(Result::is_ok) - .flatten_ok() - .skip(inner_raw_dirent_offset); - - let mut dirents = futures::stream::iter(raw_dirents); - - let mut nread = 0; - while let Some(result) = dirents.next_dirent().await { - let entry = result?; - - match callback(&entry.filename, entry.cluster.as_ino()) { - Err(err) => return Ok(Err(err)), - Ok(true) => nread += entry.entry_offset as usize, - Ok(false) => break, - } - } + let dir_data = dir_pages + .iter() + .skip(cluster_offset) + .map(|pg| pg.as_bytes()); - Ok(Ok(nread)) + let raw_dirents = dir_data + .map(as_raw_dirents) + .take_while_inclusive(Result::is_ok) + .flatten_ok() + .skip(inner_raw_dirent_offset); + + let mut dirents = futures::stream::iter(raw_dirents); + + let mut nread = 0; + while let Some(result) = dirents.next_dirent().await { + let entry = result?; + + match callback(&entry.filename, entry.cluster.as_ino()) { + Err(err) => return Ok(Err(err)), + Ok(true) => nread += entry.entry_offset as usize, + Ok(false) => break, + } } + + Ok(Ok(nread)) } } diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 57b881df..32ede420 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -1,30 +1,21 @@ +use alloc::sync::Arc; +use core::sync::atomic::{AtomicU64, Ordering}; + +use async_trait::async_trait; +use eonix_sync::{LazyLock, RwLock}; + +use crate::io::Buffer; use crate::kernel::constants::{EACCES, EISDIR, ENOTDIR}; +use crate::kernel::mem::paging::PageBuffer; use crate::kernel::timer::Instant; -use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; +use crate::kernel::vfs::mount::{dump_mounts, register_filesystem, Mount, MountCreator}; use crate::kernel::vfs::types::{DeviceId, Format, Permission}; use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; -use crate::{ - io::Buffer, - kernel::{ - mem::paging::PageBuffer, - vfs::{ - dentry::Dentry, - inode::{Ino, Inode}, - mount::{dump_mounts, register_filesystem, Mount, MountCreator}, - }, - }, - prelude::*, -}; -use alloc::sync::Arc; -use async_trait::async_trait; -use core::future::Future; -use core::sync::atomic::{AtomicU64, Ordering}; -use eonix_sync::{LazyLock, RwLock}; +use crate::prelude::*; struct Node { - ino: Ino, - sb: SbRef, - info: Spin, kind: NodeKind, } @@ -39,38 +30,19 @@ struct FileInode { } struct DirInode { - entries: RwLock, InodeUse)>>, + entries: RwLock, InodeUse)>>, } impl InodeOps for Node { type SuperBlock = ProcFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - match &self.kind { - NodeKind::File(_) => Format::REG, - NodeKind::Dir(_) => Format::DIR, - } - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&crate::kernel::mem::PageCache> { - None - } -} - -impl InodeFileOps for Node { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + async fn read( + &self, + _: SbUse, + _: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { let NodeKind::File(file_inode) = &self.kind else { return Err(EISDIR); }; @@ -88,10 +60,13 @@ impl InodeFileOps for Node { Ok(buffer.fill(data)?.allow_partial()) } -} -impl InodeDirOps for Node { - async fn lookup(&self, dentry: &Arc) -> KResult>> { + async fn lookup( + &self, + _: SbUse, + _: &InodeUse, + dentry: &Arc, + ) -> KResult> { let NodeKind::Dir(dir) = &self.kind else { return Err(ENOTDIR); }; @@ -108,29 +83,29 @@ impl InodeDirOps for Node { Ok(None) } - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn readdir( + &self, + _: SbUse, + _: &InodeUse, offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - Box::pin(async move { - let NodeKind::Dir(dir) = &self.kind else { - return Err(ENOTDIR); - }; - - let entries = dir.entries.read().await; - - let mut count = 0; - for (name, node) in entries.iter().skip(offset) { - match callback(name.as_ref(), node.ino) { - Err(err) => return Ok(Err(err)), - Ok(true) => count += 1, - Ok(false) => break, - } + callback: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let NodeKind::Dir(dir) = &self.kind else { + return Err(ENOTDIR); + }; + + let entries = dir.entries.read().await; + + let mut count = 0; + for (name, node) in entries.iter().skip(offset) { + match callback(name.as_ref(), node.ino) { + Err(err) => return Ok(Err(err)), + Ok(true) => count += 1, + Ok(false) => break, } + } - Ok(Ok(count)) - }) + Ok(Ok(count)) } } @@ -139,11 +114,12 @@ impl Node { ino: Ino, sb: SbRef, read: impl Fn(&mut PageBuffer) -> KResult<()> + Send + Sync + 'static, - ) -> InodeUse { - InodeUse::new(Self { - ino, + ) -> InodeUse { + InodeUse::new( sb, - info: Spin::new(InodeInfo { + ino, + Format::REG, + InodeInfo { size: 0, nlink: 1, uid: 0, @@ -152,16 +128,19 @@ impl Node { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - kind: NodeKind::File(FileInode::new(Box::new(read))), - }) + }, + Self { + kind: NodeKind::File(FileInode::new(Box::new(read))), + }, + ) } - fn new_dir(ino: Ino, sb: SbRef) -> InodeUse { - InodeUse::new(Self { - ino, + fn new_dir(ino: Ino, sb: SbRef) -> InodeUse { + InodeUse::new( sb, - info: Spin::new(InodeInfo { + ino, + Format::DIR, + InodeInfo { size: 0, nlink: 1, uid: 0, @@ -170,9 +149,11 @@ impl Node { atime: Instant::UNIX_EPOCH, ctime: Instant::UNIX_EPOCH, mtime: Instant::UNIX_EPOCH, - }), - kind: NodeKind::Dir(DirInode::new()), - }) + }, + Self { + kind: NodeKind::Dir(DirInode::new()), + }, + ) } } @@ -194,7 +175,7 @@ impl DirInode { } pub struct ProcFs { - root: InodeUse, + root: InodeUse, next_ino: AtomicU64, } @@ -240,7 +221,7 @@ where F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static, { let procfs = &GLOBAL_PROCFS.backend; - let root = &procfs.root; + let root = &procfs.root.get_priv::(); let NodeKind::Dir(root) = &root.kind else { unreachable!(); diff --git a/src/fs/tmpfs/dir.rs b/src/fs/tmpfs/dir.rs index e2be1d12..4dd64d52 100644 --- a/src/fs/tmpfs/dir.rs +++ b/src/fs/tmpfs/dir.rs @@ -1,72 +1,51 @@ -use core::{any::Any, future::Future}; +use alloc::sync::Arc; +use alloc::vec; +use alloc::vec::Vec; -use alloc::{boxed::Box, sync::Arc, vec, vec::Vec}; use eonix_log::println_warn; -use eonix_sync::{LazyLock, RwLock, Spin}; - -use crate::{ - kernel::{ - constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR}, - mem::PageCache, - timer::Instant, - vfs::{ - dentry::{dcache, Dentry}, - inode::{ - Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, RenameData, - }, - types::{DeviceId, Format, Mode, Permission}, - SbRef, - }, - }, - prelude::KResult, -}; - -use super::{ - file::{DeviceInode, FileInode, SymlinkInode}, - TmpFs, -}; +use eonix_sync::{LazyLock, RwLock}; + +use super::file::{DeviceInode, FileInode, SymlinkInode}; +use super::TmpFs; +use crate::kernel::constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::{dcache, Dentry}; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse, RenameData}; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse}; +use crate::prelude::KResult; pub struct DirectoryInode { - sb: SbRef, - ino: Ino, - info: Spin, entries: RwLock, Ino)>>, } -impl InodeOps for DirectoryInode { - type SuperBlock = TmpFs; - - fn ino(&self) -> Ino { - self.ino - } +fn link(dir: &InodeUse, entries: &mut Vec<(Arc<[u8]>, Ino)>, name: Arc<[u8]>, file: &InodeUse) { + let mut dir_info = dir.info.lock(); + let mut file_info = file.info.lock(); - fn format(&self) -> Format { - Format::DIR - } + let now = Instant::now(); - fn info(&self) -> &Spin { - &self.info - } + file_info.nlink += 1; + file_info.ctime = now; - fn super_block(&self) -> &SbRef { - &self.sb - } + dir_info.size += 1; + dir_info.mtime = now; + dir_info.ctime = now; - fn page_cache(&self) -> Option<&PageCache> { - None - } + entries.push((name, file.ino)); } impl DirectoryInode { - pub fn new(ino: Ino, sb: SbRef, perm: Permission) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, perm: Permission) -> InodeUse { static DOT: LazyLock> = LazyLock::new(|| Arc::from(b".".as_slice())); let now = Instant::now(); - InodeUse::new(Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + Format::DIR, + InodeInfo { size: 1, nlink: 1, // link from `.` to itself perm, @@ -75,35 +54,16 @@ impl DirectoryInode { atime: now, uid: 0, gid: 0, - }), - entries: RwLock::new(vec![(DOT.clone(), ino)]), - }) - } - - fn link( - &self, - entries: &mut Vec<(Arc<[u8]>, Ino)>, - name: Arc<[u8]>, - file: &InodeUse, - ) { - let mut self_info = self.info.lock(); - let mut file_info = file.info().lock(); - - let now = Instant::now(); - - file_info.nlink += 1; - file_info.ctime = now; - - self_info.size += 1; - self_info.mtime = now; - self_info.ctime = now; - - entries.push((name, file.ino())); + }, + Self { + entries: RwLock::new(vec![(DOT.clone(), ino)]), + }, + ) } fn do_unlink( &self, - file: &InodeUse, + file: &InodeUse, filename: &[u8], entries: &mut Vec<(Arc<[u8]>, Ino)>, now: Instant, @@ -112,11 +72,11 @@ impl DirectoryInode { file_info: &mut InodeInfo, ) -> KResult<()> { // SAFETY: `file_lock` has done the synchronization - if file.format() == Format::DIR { + if file.format == Format::DIR { return Err(EISDIR); } - let file_ino = file.ino(); + let file_ino = file.ino; entries.retain(|(name, ino)| *ino != file_ino || name.as_ref() != filename); if decrease_size { @@ -138,87 +98,114 @@ impl DirectoryInode { } } -impl InodeDirOps for DirectoryInode { - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, +impl InodeOps for DirectoryInode { + type SuperBlock = TmpFs; + + async fn readdir( + &self, + sb: SbUse, + _: &InodeUse, offset: usize, - for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - Box::pin(async move { - let _sb = self.sb.get()?; - let entries = self.entries.read().await; - - let mut count = 0; - for entry in entries.iter().skip(offset) { - match for_each_entry(&entry.0, entry.1) { - Err(err) => return Ok(Err(err)), - Ok(false) => break, - Ok(true) => count += 1, - } + for_each_entry: &mut (dyn FnMut(&[u8], Ino) -> KResult + Send), + ) -> KResult> { + let _sb = sb; + let entries = self.entries.read().await; + + let mut count = 0; + for entry in entries.iter().skip(offset) { + match for_each_entry(&entry.0, entry.1) { + Err(err) => return Ok(Err(err)), + Ok(false) => break, + Ok(true) => count += 1, } + } - Ok(Ok(count)) - }) + Ok(Ok(count)) } - async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { - let sb = self.sb.get()?; + async fn create( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Arc, + perm: Permission, + ) -> KResult<()> { let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let file: InodeUse = FileInode::new(ino, self.sb.clone(), 0, perm); + let file = FileInode::new(ino, sb.get_ref(), 0, perm); - self.link(&mut entries, at.get_name(), &file); + link(inode, &mut entries, at.get_name(), &file); at.fill(file); Ok(()) } - async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + async fn mknod( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Dentry, + mode: Mode, + dev: DeviceId, + ) -> KResult<()> { if !mode.is_chr() && !mode.is_blk() { return Err(EINVAL); } - let sb = self.sb.get()?; let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let file: InodeUse = DeviceInode::new(ino, self.sb.clone(), mode, dev); + let file = DeviceInode::new(ino, sb.get_ref(), mode, dev); - self.link(&mut entries, at.get_name(), &file); + link(inode, &mut entries, at.get_name(), &file); at.fill(file); Ok(()) } - async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - let sb = self.sb.get()?; + async fn symlink( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Arc, + target: &[u8], + ) -> KResult<()> { let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let file: InodeUse = SymlinkInode::new(ino, self.sb.clone(), target.into()); + let file = SymlinkInode::new(ino, sb.get_ref(), target.into()); - self.link(&mut entries, at.get_name(), &file); + link(inode, &mut entries, at.get_name(), &file); at.fill(file); Ok(()) } - async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { - let sb = self.sb.get()?; + async fn mkdir( + &self, + sb: SbUse, + inode: &InodeUse, + at: &Dentry, + perm: Permission, + ) -> KResult<()> { let mut entries = self.entries.write().await; let ino = sb.backend.assign_ino(); - let new_dir: InodeUse = DirectoryInode::new(ino, self.sb.clone(), perm); + let new_dir = DirectoryInode::new(ino, sb.get_ref(), perm); - self.link(&mut entries, at.get_name(), &new_dir); + link(inode, &mut entries, at.get_name(), &new_dir); at.fill(new_dir); Ok(()) } - async fn unlink(&self, at: &Arc) -> KResult<()> { - let _sb = self.sb.get()?; + async fn unlink( + &self, + _sb: SbUse, + inode: &InodeUse, + at: &Arc, + ) -> KResult<()> { let mut entries = self.entries.write().await; let file = at.get_inode()?; @@ -230,8 +217,8 @@ impl InodeDirOps for DirectoryInode { &mut entries, Instant::now(), true, - &mut self.info.lock(), - &mut file.info().lock(), + &mut inode.info.lock(), + &mut file.info.lock(), )?; // Remove the dentry from the dentry cache immediately @@ -241,8 +228,12 @@ impl InodeDirOps for DirectoryInode { Ok(()) } - async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { - let sb = self.sb.get()?; + async fn rename( + &self, + sb: SbUse, + inode: &InodeUse, + rename_data: RenameData<'_, '_>, + ) -> KResult<()> { let _rename_lock = sb.backend.rename_lock.lock().await; let mut self_entries = self.entries.write().await; @@ -266,11 +257,11 @@ impl InodeDirOps for DirectoryInode { return Err(EEXIST); } - if new_parent.as_raw() == &raw const *self { + if inode == &new_parent { // Same directory rename // Remove from old location and add to new location - let old_ino = old_file.ino(); - let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_ino = old_file.ino; + let new_ino = new_file.as_ref().map(|f| f.ino); let old_name = old_dentry.get_name(); let new_name = new_dentry.get_name(); @@ -299,7 +290,7 @@ impl InodeDirOps for DirectoryInode { // Replace existing file (i.e. rename the old and unlink the new) let new_file = new_file.unwrap(); - match (new_file.format(), old_file.format()) { + match (new_file.format, old_file.format) { (Format::DIR, _) => return Err(EISDIR), (_, Format::DIR) => return Err(ENOTDIR), _ => {} @@ -307,12 +298,12 @@ impl InodeDirOps for DirectoryInode { self_entries.remove(new_idx); - self.info.lock().size -= 1; + inode.info.lock().size -= 1; // The last reference to the inode is held by some dentry // and will be released when the dentry is released - let mut new_info = new_file.info().lock(); + let mut new_info = new_file.info.lock(); new_info.nlink -= 1; new_info.mtime = now; @@ -322,24 +313,21 @@ impl InodeDirOps for DirectoryInode { let (name, _) = &mut self_entries[old_ent_idx]; *name = new_dentry.get_name(); - let mut self_info = self.info.lock(); + let mut self_info = inode.info.lock(); self_info.mtime = now; self_info.ctime = now; } else { // Cross-directory rename - handle similar to same directory case // Get new parent directory - let new_parent_inode = new_dentry.parent().get_inode()?; - assert_eq!(new_parent_inode.format(), Format::DIR); - - let new_parent = (&new_parent_inode as &dyn Any) - .downcast_ref::() - .expect("new parent must be a DirectoryInode"); + let new_parent = new_dentry.parent().get_inode()?; + assert_eq!(new_parent.format, Format::DIR); - let mut new_entries = new_parent.entries.write().await; + let new_parent_priv = new_parent.get_priv::(); + let mut new_entries = new_parent_priv.entries.write().await; - let old_ino = old_file.ino(); - let new_ino = new_file.as_ref().map(|f| f.ino()); + let old_ino = old_file.ino; + let new_ino = new_file.as_ref().map(|f| f.ino); let old_name = old_dentry.get_name(); let new_name = new_dentry.get_name(); @@ -361,26 +349,28 @@ impl InodeDirOps for DirectoryInode { // Replace existing file (i.e. move the old and unlink the new) let new_file = new_file.unwrap(); - match (old_file.format(), new_file.format()) { + match (old_file.format, new_file.format) { (Format::DIR, Format::DIR) => {} (Format::DIR, _) => return Err(ENOTDIR), (_, _) => {} } // Unlink the old file that was replaced - new_parent.do_unlink( + new_parent_priv.do_unlink( &new_file, &new_name, &mut new_entries, now, false, &mut new_parent.info.lock(), - &mut new_file.info().lock(), + &mut new_file.info.lock(), )?; } else { - new_parent.info.lock().size += 1; - new_parent.info.lock().mtime = now; - new_parent.info.lock().ctime = now; + let mut info = new_parent.info.lock(); + + info.size += 1; + info.mtime = now; + info.ctime = now; } // Remove from old directory @@ -389,7 +379,7 @@ impl InodeDirOps for DirectoryInode { // Add new entry new_entries.push((new_name, old_ino)); - let mut self_info = self.info.lock(); + let mut self_info = inode.info.lock(); self_info.size -= 1; self_info.mtime = now; self_info.ctime = now; @@ -398,17 +388,16 @@ impl InodeDirOps for DirectoryInode { dcache::d_exchange(old_dentry, new_dentry).await; Ok(()) } -} - -impl InodeFileOps for DirectoryInode { - async fn chmod(&self, perm: Permission) -> KResult<()> { - let _sb = self.sb.get()?; - { - let mut info = self.info.lock(); - info.perm = perm; - info.ctime = Instant::now(); - } + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); Ok(()) } diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs index 624112e0..a1755908 100644 --- a/src/fs/tmpfs/file.rs +++ b/src/fs/tmpfs/file.rs @@ -1,39 +1,26 @@ +use alloc::collections::btree_map::BTreeMap; use alloc::sync::Arc; -use eonix_mm::paging::PAGE_SIZE; -use eonix_sync::{RwLock, Spin}; - -use crate::{ - io::{Buffer, Stream}, - kernel::{ - mem::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}, - timer::Instant, - vfs::{ - inode::{Ino, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, WriteOffset}, - types::{DeviceId, Format, Mode, Permission}, - SbRef, - }, - }, - prelude::KResult, -}; use super::TmpFs; +use crate::io::{Buffer, Stream}; +use crate::kernel::mem::{CachePage, PageCache, PageOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse, WriteOffset}; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse}; +use crate::prelude::KResult; -pub struct FileInode { - sb: SbRef, - ino: Ino, - info: Spin, - rwsem: RwLock<()>, - pages: PageCache, -} +pub struct FileInode; impl FileInode { - pub fn new(ino: Ino, sb: SbRef, size: usize, perm: Permission) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, size: usize, perm: Permission) -> InodeUse { let now = Instant::now(); - InodeUse::new_cyclic(|weak| Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + Format::REG, + InodeInfo { size: size as _, nlink: 1, uid: 0, @@ -42,60 +29,34 @@ impl FileInode { atime: now, ctime: now, mtime: now, - }), - rwsem: RwLock::new(()), - pages: PageCache::new(weak.clone() as _), - }) - } -} - -impl PageCacheBackendOps for FileInode { - async fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult { - Ok(PAGE_SIZE) - } - - fn size(&self) -> usize { - self.info.lock().size as usize + }, + Self, + ) } } impl InodeOps for FileInode { type SuperBlock = TmpFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - Format::REG - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - Some(&self.pages) - } -} - -impl InodeDirOps for FileInode {} -impl InodeFileOps for FileInode { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - let _lock = self.rwsem.read().await; - self.pages.read(buffer, offset).await + async fn read( + &self, + _: SbUse, + inode: &InodeUse, + buffer: &mut dyn Buffer, + offset: usize, + ) -> KResult { + let _lock = inode.rwsem.read().await; + inode.get_page_cache().read(buffer, offset).await } - async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { - let _lock = self.rwsem.write().await; + async fn write( + &self, + _: SbUse, + inode: &InodeUse, + stream: &mut dyn Stream, + offset: WriteOffset<'_>, + ) -> KResult { + let _lock = inode.rwsem.write().await; let mut store_new_end = None; let offset = match offset { @@ -104,74 +65,131 @@ impl InodeFileOps for FileInode { store_new_end = Some(end); // `info.size` won't change since we are holding the write lock. - self.info.lock().size as usize + inode.info.lock().size as usize } }; - let wrote = self.pages.write(stream, offset).await?; + let page_cache = inode.get_page_cache(); + + if Arc::strong_count(&page_cache) == 1 { + // XXX: A temporary workaround here. Change this ASAP... + // Prevent the page cache from being dropped during the write. + let _ = Arc::into_raw(page_cache.clone()); + } + + let wrote = page_cache.write(stream, offset).await?; let cursor_end = offset + wrote; if let Some(store_end) = store_new_end { *store_end = cursor_end; } - { - let now = Instant::now(); - let mut info = self.info.lock(); - info.mtime = now; - info.ctime = now; - info.size = info.size.max(cursor_end as u64); - } - Ok(wrote) } - async fn truncate(&self, length: usize) -> KResult<()> { - let _lock = self.rwsem.write().await; + async fn truncate( + &self, + _: SbUse, + inode: &InodeUse, + length: usize, + ) -> KResult<()> { + let _lock = inode.rwsem.write().await; - self.pages.resize(length).await?; + let now = Instant::now(); + let mut info = inode.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = length as u64; - { - let now = Instant::now(); - let mut info = self.info.lock(); - info.mtime = now; - info.ctime = now; - info.size = length as u64; - } + Ok(()) + } + + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + + info.perm = perm; + info.ctime = Instant::now(); + + Ok(()) + } + async fn read_page( + &self, + _: SbUse, + _: &InodeUse, + page: &mut CachePage, + _: PageOffset, + ) -> KResult<()> { + page.as_bytes_mut().fill(0); Ok(()) } - async fn chmod(&self, perm: Permission) -> KResult<()> { - let _sb = self.sb.get()?; + async fn write_page( + &self, + _: SbUse, + _: &InodeUse, + _: &mut CachePage, + _: PageOffset, + ) -> KResult<()> { + // XXX: actually we should refuse to do the writeback. + // think of a way to inform that of the page cache. + Ok(()) + } - { - let mut info = self.info.lock(); + async fn write_begin<'a>( + &self, + _: SbUse, + _: &InodeUse, + page_cache: &PageCache, + pages: &'a mut BTreeMap, + offset: usize, + _: usize, + ) -> KResult<&'a mut CachePage> { + // TODO: Remove dependency on `page_cache`. + page_cache + .get_page_locked(pages, PageOffset::from_byte_floor(offset)) + .await + } - info.perm = perm; - info.ctime = Instant::now(); - } + async fn write_end( + &self, + _: SbUse, + inode: &InodeUse, + _: &PageCache, + _: &mut BTreeMap, + offset: usize, + _: usize, + copied: usize, + ) -> KResult<()> { + let now = Instant::now(); + let mut info = inode.info.lock(); + info.mtime = now; + info.ctime = now; + info.size = info.size.max((offset + copied) as u64); Ok(()) } } pub struct DeviceInode { - sb: SbRef, - ino: Ino, - info: Spin, is_block: bool, devid: DeviceId, } impl DeviceInode { - pub fn new(ino: Ino, sb: SbRef, mode: Mode, devid: DeviceId) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, mode: Mode, devid: DeviceId) -> InodeUse { let now = Instant::now(); - InodeUse::new(Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + mode.format(), + InodeInfo { size: 0, nlink: 1, uid: 0, @@ -180,76 +198,49 @@ impl DeviceInode { atime: now, ctime: now, mtime: now, - }), - is_block: mode.format() == Format::BLK, - devid, - }) + }, + Self { + is_block: mode.format() == Format::BLK, + devid, + }, + ) } } impl InodeOps for DeviceInode { type SuperBlock = TmpFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - if self.is_block { - Format::BLK - } else { - Format::CHR - } - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } -} - -impl InodeDirOps for DeviceInode {} -impl InodeFileOps for DeviceInode { - async fn chmod(&self, perm: Permission) -> KResult<()> { - let _sb = self.sb.get()?; - - { - let mut info = self.info.lock(); - - info.perm = perm; - info.ctime = Instant::now(); - } + async fn chmod( + &self, + _sb: SbUse, + inode: &InodeUse, + perm: Permission, + ) -> KResult<()> { + let mut info = inode.info.lock(); + info.perm = perm; + info.ctime = Instant::now(); Ok(()) } - fn devid(&self) -> KResult { + fn devid(&self, _: SbUse, _: &InodeUse) -> KResult { Ok(self.devid) } } pub struct SymlinkInode { - sb: SbRef, - ino: Ino, - info: Spin, target: Arc<[u8]>, } impl SymlinkInode { - pub fn new(ino: Ino, sb: SbRef, target: Arc<[u8]>) -> InodeUse { + pub fn new(ino: Ino, sb: SbRef, target: Arc<[u8]>) -> InodeUse { let now = Instant::now(); - InodeUse::new(Self { + InodeUse::new( sb, ino, - info: Spin::new(InodeInfo { + Format::LNK, + InodeInfo { size: target.len() as _, nlink: 1, uid: 0, @@ -258,39 +249,21 @@ impl SymlinkInode { atime: now, ctime: now, mtime: now, - }), - target, - }) + }, + Self { target }, + ) } } -impl InodeDirOps for SymlinkInode {} impl InodeOps for SymlinkInode { type SuperBlock = TmpFs; - fn ino(&self) -> Ino { - self.ino - } - - fn format(&self) -> Format { - Format::LNK - } - - fn info(&self) -> &Spin { - &self.info - } - - fn super_block(&self) -> &SbRef { - &self.sb - } - - fn page_cache(&self) -> Option<&PageCache> { - None - } -} - -impl InodeFileOps for SymlinkInode { - async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { + async fn readlink( + &self, + _sb: SbUse, + _inode: &InodeUse, + buffer: &mut dyn Buffer, + ) -> KResult { buffer .fill(self.target.as_ref()) .map(|result| result.allow_partial()) diff --git a/src/fs/tmpfs/mod.rs b/src/fs/tmpfs/mod.rs index 2bef67b6..62a0dfc2 100644 --- a/src/fs/tmpfs/mod.rs +++ b/src/fs/tmpfs/mod.rs @@ -1,23 +1,20 @@ mod dir; mod file; -use crate::kernel::vfs::inode::{Ino, InodeUse}; -use crate::kernel::vfs::types::{DeviceId, Permission}; -use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; -use crate::{ - kernel::vfs::{ - dentry::Dentry, - mount::{register_filesystem, Mount, MountCreator}, - }, - prelude::*, -}; use alloc::sync::Arc; +use core::sync::atomic::{AtomicU64, Ordering}; + use async_trait::async_trait; -use core::sync::atomic::AtomicU64; -use core::sync::atomic::Ordering; use dir::DirectoryInode; use eonix_sync::Mutex; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{Ino, InodeUse}; +use crate::kernel::vfs::mount::{register_filesystem, Mount, MountCreator}; +use crate::kernel::vfs::types::{DeviceId, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo}; +use crate::prelude::*; + pub struct TmpFs { next_ino: AtomicU64, rename_lock: Mutex<()>, @@ -30,7 +27,7 @@ impl TmpFs { Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed)) } - fn create() -> KResult<(SbUse, InodeUse)> { + fn create() -> KResult<(SbUse, InodeUse)> { let tmpfs = SbUse::new( SuperBlockInfo { io_blksize: 4096, diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index bfc826bf..f8b5dc0b 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -12,5 +12,5 @@ pub use access::PhysAccess; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; -pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackendOps}; +pub use page_cache::{CachePage, PageCache, PageOffset}; pub use paging::{Page, PageBuffer, PageExcl, PageExt}; diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index dcbeeb63..2891dad8 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -4,12 +4,12 @@ use core::cmp; use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; -use eonix_mm::paging::{PAGE_SIZE, PFN}; +use eonix_mm::paging::PFN; use super::mm_list::EMPTY_PAGE; use super::{Mapping, Page, Permission}; -use crate::kernel::constants::EINVAL; -use crate::kernel::mem::{PageExcl, PageExt}; +use crate::kernel::mem::page_cache::PageOffset; +use crate::kernel::mem::{CachePage, PageExcl, PageExt}; use crate::prelude::KResult; #[derive(Debug)] @@ -141,59 +141,48 @@ impl MMArea { assert!(offset < file_mapping.length, "Offset out of range"); - let Some(page_cache) = file_mapping.file.page_cache() else { - panic!("Mapping file should have pagecache"); + let file_offset = file_mapping.offset + offset; + + let map_page = |page: &Page, cache_page: &CachePage| { + if !self.permission.write { + assert!(!write, "Write fault on read-only mapping"); + + *pfn = page.clone().into_raw(); + return; + } + + if self.is_shared { + // We don't process dirty flags in write faults. + // Simply assume that page will eventually be dirtied. + // So here we can set the dirty flag now. + cache_page.set_dirty(true); + attr.insert(PageAttribute::WRITE); + *pfn = page.clone().into_raw(); + return; + } + + if !write { + // Delay the copy-on-write until write fault happens. + attr.insert(PageAttribute::COPY_ON_WRITE); + *pfn = page.clone().into_raw(); + return; + } + + // XXX: Change this. Let's handle mapped pages before CoW pages. + // Nah, we are writing to a mapped private mapping... + let mut new_page = PageExcl::zeroed(); + new_page + .as_bytes_mut() + .copy_from_slice(page.lock().as_bytes()); + + attr.insert(PageAttribute::WRITE); + *pfn = new_page.into_page().into_raw(); }; - let file_offset = file_mapping.offset + offset; - let cnt_to_read = (file_mapping.length - offset).min(0x1000); - - page_cache - .with_page(file_offset, |page, cache_page| { - // Non-write faults: we find page in pagecache and do mapping - // Write fault: we need to care about shared or private mapping. - if !write { - // Bss is embarrassing in pagecache! - // We have to assume cnt_to_read < PAGE_SIZE all bss - if cnt_to_read < PAGE_SIZE { - let mut new_page = PageExcl::zeroed(); - - new_page.as_bytes_mut()[..cnt_to_read] - .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); - - *pfn = new_page.into_page().into_raw(); - } else { - *pfn = page.clone().into_raw(); - } - - if self.permission.write { - if self.is_shared { - // The page may will not be written, - // But we simply assume page will be dirty - cache_page.set_dirty(); - attr.insert(PageAttribute::WRITE); - } else { - attr.insert(PageAttribute::COPY_ON_WRITE); - } - } - } else { - if self.is_shared { - cache_page.set_dirty(); - *pfn = page.clone().into_raw(); - } else { - let mut new_page = PageExcl::zeroed(); - - new_page.as_bytes_mut()[..cnt_to_read] - .copy_from_slice(&page.lock().as_bytes()[..cnt_to_read]); - - *pfn = new_page.into_page().into_raw(); - } - - attr.insert(PageAttribute::WRITE); - } - }) - .await? - .ok_or(EINVAL)?; + file_mapping + .page_cache + .with_page(PageOffset::from_byte_floor(file_offset), map_page) + .await?; attr.insert(PageAttribute::PRESENT); attr.remove(PageAttribute::MAPPED); diff --git a/src/kernel/mem/mm_list/mapping.rs b/src/kernel/mem/mm_list/mapping.rs index 5446ae42..2b837ae7 100644 --- a/src/kernel/mem/mm_list/mapping.rs +++ b/src/kernel/mem/mm_list/mapping.rs @@ -1,9 +1,12 @@ -use crate::kernel::vfs::inode::{Inode, InodeUse}; +use alloc::sync::Arc; + use eonix_mm::paging::PAGE_SIZE; +use crate::kernel::mem::PageCache; + #[derive(Debug, Clone)] pub struct FileMapping { - pub file: InodeUse, + pub page_cache: Arc, /// Offset in the file, aligned to 4KB boundary. pub offset: usize, /// Length of the mapping. Exceeding part will be zeroed. @@ -19,10 +22,10 @@ pub enum Mapping { } impl FileMapping { - pub fn new(file: InodeUse, offset: usize, length: usize) -> Self { + pub fn new(page_cache: Arc, offset: usize, length: usize) -> Self { assert_eq!(offset & (PAGE_SIZE - 1), 0); Self { - file, + page_cache, offset, length, } @@ -30,10 +33,10 @@ impl FileMapping { pub fn offset(&self, offset: usize) -> Self { if self.length <= offset { - Self::new(self.file.clone(), self.offset + self.length, 0) + Self::new(self.page_cache.clone(), self.offset + self.length, 0) } else { Self::new( - self.file.clone(), + self.page_cache.clone(), self.offset + offset, self.length - offset, ) diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index 6f14583d..7aac141d 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,11 +1,12 @@ -use super::{MMList, VAddr}; -use crate::kernel::task::Thread; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; use eonix_mm::paging::PAGE_SIZE; use posix_types::signal::Signal; +use super::{MMList, VAddr}; +use crate::kernel::task::Thread; + #[repr(C)] struct FixEntry { start: u64, diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index 9dce4567..37344fc2 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -4,9 +4,9 @@ mod zones; use core::sync::atomic::Ordering; use buddy_allocator::BuddyAllocator; -use eonix_mm::address::{AddrOps as _, PRange}; +use eonix_mm::address::PRange; use eonix_mm::paging::{ - GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, PFN, + GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, }; use eonix_preempt::PreemptGuard; use eonix_sync::{NoContext, Spin}; diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 0baa7b9a..074f82c7 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -32,15 +32,9 @@ impl SlabPageData { } } -#[derive(Clone, Copy)] -struct PageCacheData { - valid_size: usize, -} - #[repr(C)] union PageData { slab: SlabPageData, - page_cache: PageCacheData, } pub struct RawPage { @@ -245,27 +239,16 @@ impl SlabPage for RawPage { } impl PageCacheRawPage for RawPagePtr { - fn valid_size(&self) -> &mut usize { - unsafe { - // SAFETY: The caller ensures that the page is in some page cache. - &mut self.as_mut().shared_data.page_cache.valid_size - } - } - fn is_dirty(&self) -> bool { self.flags().has(PageFlags::DIRTY) } - fn clear_dirty(&self) { - self.flags().clear(PageFlags::DIRTY); - } - - fn set_dirty(&self) { - self.flags().set(PageFlags::DIRTY); - } - - fn cache_init(&self) { - self.as_mut().shared_data.page_cache = PageCacheData { valid_size: 0 }; + fn set_dirty(&self, dirty: bool) { + if dirty { + self.flags().set(PageFlags::DIRTY); + } else { + self.flags().clear(PageFlags::DIRTY); + } } } diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 6a1c04ca..214c65a5 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -1,26 +1,27 @@ -use alloc::boxed::Box; -use alloc::collections::btree_map::BTreeMap; -use alloc::sync::Weak; +use alloc::collections::btree_map::{BTreeMap, Entry}; use core::future::Future; use core::mem::ManuallyDrop; -use align_ext::AlignExt; -use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess}; use eonix_mm::paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; use eonix_sync::Mutex; -use super::paging::AllocZeroed; use super::Page; -use crate::io::{Buffer, FillResult, Stream}; +use crate::io::{Buffer, Stream}; +use crate::kernel::constants::EINVAL; use crate::kernel::mem::page_alloc::RawPagePtr; +use crate::kernel::vfs::inode::InodeUse; use crate::prelude::KResult; use crate::GlobalPageAlloc; +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct PageOffset(usize); + pub struct PageCache { - pages: Mutex>, - backend: Weak, + pages: Mutex>, + inode: InodeUse, } unsafe impl Send for PageCache {} @@ -30,70 +31,46 @@ unsafe impl Sync for PageCache {} pub struct CachePage(RawPagePtr); unsafe impl Send for CachePage {} +unsafe impl Sync for CachePage {} -impl Buffer for CachePage { - fn total(&self) -> usize { - PAGE_SIZE +impl PageOffset { + pub const fn from_byte_floor(offset: usize) -> Self { + Self(offset >> PAGE_SIZE_BITS) } - fn wrote(&self) -> usize { - self.valid_size() + pub const fn from_byte_ceil(offset: usize) -> Self { + Self((offset + PAGE_SIZE - 1) >> PAGE_SIZE_BITS) } - fn fill(&mut self, data: &[u8]) -> KResult { - let valid_size = self.valid_size(); - let available = &mut self.all_mut()[valid_size..]; - if available.len() == 0 { - return Ok(FillResult::Full); - } - - let len = core::cmp::min(data.len(), available.len()); - available[..len].copy_from_slice(&data[..len]); + pub fn iter_till(self, end: PageOffset) -> impl Iterator { + (self.0..end.0).map(PageOffset) + } - *self.0.valid_size() += len; + pub fn page_count(self) -> usize { + self.0 + } - if len < data.len() { - Ok(FillResult::Partial(len)) - } else { - Ok(FillResult::Done(len)) - } + pub fn byte_count(self) -> usize { + self.page_count() * PAGE_SIZE } } impl CachePage { pub fn new() -> Self { - let page = GlobalPageAlloc.alloc().unwrap(); - page.cache_init(); - Self(page) - } - - pub fn new_zeroed() -> Self { - let page = Page::zeroed(); - let raw_page_ptr = RawPagePtr::from(page.into_raw()); - - raw_page_ptr.cache_init(); - Self(raw_page_ptr) - } - - pub fn valid_size(&self) -> usize { - *self.0.valid_size() + Self(GlobalPageAlloc.alloc().unwrap()) } - pub fn set_valid_size(&mut self, valid_size: usize) { - *self.0.valid_size() = valid_size; - } - - pub fn all(&self) -> &[u8] { + pub fn as_bytes(&self) -> &[u8] { unsafe { core::slice::from_raw_parts( - // SAFETY: The page is exclusively owned by us, so we can safely access its data. + // SAFETY: The page is owned by us, so we can safely access its data. ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), PAGE_SIZE, ) } } - pub fn all_mut(&mut self) -> &mut [u8] { + pub fn as_bytes_mut(&mut self) -> &mut [u8] { unsafe { core::slice::from_raw_parts_mut( // SAFETY: The page is exclusively owned by us, so we can safely access its data. @@ -103,306 +80,171 @@ impl CachePage { } } - pub fn valid_data(&self) -> &[u8] { - &self.all()[..self.valid_size()] - } - pub fn is_dirty(&self) -> bool { self.0.is_dirty() } - pub fn set_dirty(&self) { - self.0.set_dirty(); + pub fn set_dirty(&self, dirty: bool) { + self.0.set_dirty(dirty); } - pub fn clear_dirty(&self) { - self.0.clear_dirty(); + pub fn get_page(&self) -> Page { + unsafe { Page::with_raw(PFN::from(self.0), |page| page.clone()) } } } impl PageCache { - pub fn new(backend: Weak) -> Self { + pub fn new(inode: InodeUse) -> Self { Self { pages: Mutex::new(BTreeMap::new()), - backend: backend, + inode, } } - pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { - let mut pages = self.pages.lock().await; - let size = self.backend.upgrade().unwrap().size(); - - loop { - if offset >= size { - break; - } - let page_id = offset >> PAGE_SIZE_BITS; - let page = pages.get(&page_id); - - match page { - Some(page) => { - let inner_offset = offset % PAGE_SIZE; - let available_in_file = size.saturating_sub(offset); - - // TODO: still cause unnecessary IO if valid_size < PAGESIZE - // and fill result is Done - let page_data = &page.valid_data()[inner_offset..]; - let read_size = page_data.len().min(available_in_file); - - if read_size == 0 - || buffer.fill(&page_data[..read_size])?.should_stop() - || buffer.available() == 0 - { - break; - } - offset += read_size; - } - None => { + pub fn get_page_locked<'a>( + &self, + pages: &'a mut BTreeMap, + pgoff: PageOffset, + ) -> impl Future> + Send + use<'_, 'a> { + async move { + match pages.entry(pgoff) { + Entry::Occupied(ent) => Ok(ent.into_mut()), + Entry::Vacant(vacant_entry) => { let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) - .await?; - pages.insert(page_id, new_page); + self.inode.read_page(&mut new_page, pgoff).await?; + + Ok(vacant_entry.insert(new_page)) } } } + } - Ok(buffer.wrote()) + fn len(&self) -> usize { + self.inode.info.lock().size as usize } - pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult { + // TODO: Remove this. + pub async fn with_page( + &self, + pgoff: PageOffset, + func: impl FnOnce(&Page, &CachePage), + ) -> KResult<()> { let mut pages = self.pages.lock().await; - let old_size = self.backend.upgrade().unwrap().size(); - let mut wrote = 0; - - loop { - let page_id = offset >> PAGE_SIZE_BITS; - let page = pages.get_mut(&page_id); - - match page { - Some(page) => { - let inner_offset = offset % PAGE_SIZE; - let cursor_end = match stream.poll_data(&mut page.all_mut()[inner_offset..])? { - Some(buf) => { - wrote += buf.len(); - inner_offset + buf.len() - } - None => { - break; - } - }; - - if page.valid_size() < cursor_end { - page.set_valid_size(cursor_end); - } - page.set_dirty(); - offset += PAGE_SIZE - inner_offset; - } - None => { - let new_page = if (offset >> PAGE_SIZE_BITS) > (old_size >> PAGE_SIZE_BITS) { - let new_page = CachePage::new_zeroed(); - new_page - } else { - let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset.align_down(PAGE_SIZE)) - .await?; - new_page - }; - - pages.insert(page_id, new_page); - } - } + if pgoff > PageOffset::from_byte_ceil(self.len()) { + return Err(EINVAL); } - Ok(wrote) - } + let cache_page = self.get_page_locked(&mut pages, pgoff).await?; - pub async fn fsync(&self) -> KResult<()> { - let pages = self.pages.lock().await; - for (page_id, page) in pages.iter() { - if page.is_dirty() { - self.backend - .upgrade() - .unwrap() - .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS) - .await?; - page.clear_dirty(); - } + unsafe { + let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(cache_page.0))); + + func(&page, cache_page); } + Ok(()) } - // This function is used for extend write or truncate - pub async fn resize(&self, new_size: usize) -> KResult<()> { + pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; - let old_size = self.backend.upgrade().unwrap().size(); + let total_len = self.len(); - if new_size < old_size { - let begin = new_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS; - let end = old_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS; + if offset >= total_len { + return Ok(0); + } - for page_id in begin..end { - pages.remove(&page_id); - } - } else if new_size > old_size { - let begin = old_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS; - let end = new_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS; + let pgoff_start = PageOffset::from_byte_floor(offset); + let pgoff_end = PageOffset::from_byte_ceil(total_len); - pages.remove(&begin); + for pgoff in pgoff_start.iter_till(pgoff_end) { + let page = self.get_page_locked(&mut pages, pgoff).await?; - for page_id in begin..end { - let mut new_page = CachePage::new_zeroed(); + let end_offset = (offset + PAGE_SIZE) / PAGE_SIZE * PAGE_SIZE; + let real_end = end_offset.min(total_len); - if page_id != end - 1 { - new_page.set_valid_size(PAGE_SIZE); - } else { - new_page.set_valid_size(new_size % PAGE_SIZE); - } - new_page.set_dirty(); - pages.insert(page_id, new_page); + let inner_offset = offset % PAGE_SIZE; + let data_len = real_end - offset; + + if buffer + .fill(&page.as_bytes()[inner_offset..inner_offset + data_len])? + .should_stop() + || buffer.available() == 0 + { + break; } + + offset = real_end; } - Ok(()) + Ok(buffer.wrote()) } - pub async fn with_page(&self, offset: usize, func: F) -> KResult> - where - F: FnOnce(&Page, &CachePage) -> O, - { - let offset_aligin = offset.align_down(PAGE_SIZE); - let page_id = offset_aligin >> PAGE_SIZE_BITS; - let size = self.backend.upgrade().unwrap().size(); - - if offset_aligin > size { - return Ok(None); - } - + pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult { let mut pages = self.pages.lock().await; + let mut total_written = 0; - let raw_page_ptr = match pages.get(&page_id) { - Some(CachePage(raw_page_ptr)) => *raw_page_ptr, - None => { - let mut new_page = CachePage::new(); - self.backend - .upgrade() - .unwrap() - .read_page(&mut new_page, offset_aligin) - .await?; - pages.insert(page_id, new_page); - new_page.0 + loop { + let end_offset = (offset + PAGE_SIZE) / PAGE_SIZE * PAGE_SIZE; + let len = end_offset - offset; + + // TODO: Rewrite to return a write state object. + let page = self + .inode + .write_begin(self, &mut pages, offset, len) + .await?; + + let inner_offset = offset % PAGE_SIZE; + let written = stream + .poll_data(&mut page.as_bytes_mut()[inner_offset..])? + .map(|b| b.len()) + .unwrap_or(0); + + page.set_dirty(true); + self.inode + .write_end(self, &mut pages, offset, len, written) + .await?; + + if written == 0 { + break; } - }; - - unsafe { - let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(raw_page_ptr))); - Ok(Some(func(&page, &CachePage(raw_page_ptr)))) + total_written += written; + offset += written; } - } -} -pub struct CachePageStream { - page: CachePage, - cur: usize, -} - -impl CachePageStream { - pub fn new(page: CachePage) -> Self { - Self { page, cur: 0 } + Ok(total_written) } -} - -impl Stream for CachePageStream { - fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult> { - if self.cur >= self.page.valid_size() { - return Ok(None); - } - - let page_data = &self.page.all()[self.cur..self.page.valid_size()]; - let to_read = buf.len().min(page_data.len()); - buf[..to_read].copy_from_slice(&page_data[..to_read]); - self.cur += to_read; + pub async fn fsync(&self) -> KResult<()> { + let mut pages = self.pages.lock().await; - Ok(Some(&mut buf[..to_read])) - } + for (&pgoff, page) in pages.iter_mut() { + if !page.is_dirty() { + continue; + } - fn ignore(&mut self, len: usize) -> KResult> { - if self.cur >= self.page.valid_size() { - return Ok(None); + self.inode.write_page(page, pgoff).await?; + page.set_dirty(false); } - let to_ignore = len.min(self.page.valid_size() - self.cur); - self.cur += to_ignore; - Ok(Some(to_ignore)) + Ok(()) } } -// with this trait, "page cache" and "block cache" are unified, -// for fs, offset is file offset (floor algin to PAGE_SIZE) -// for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE) -// Oh no, this would make unnecessary cache -pub trait PageCacheBackendOps: Sized { - fn read_page( - &self, - page: &mut CachePage, - offset: usize, - ) -> impl Future> + Send; - - fn write_page( - &self, - page: &mut CachePageStream, - offset: usize, - ) -> impl Future> + Send; - - fn size(&self) -> usize; -} - -#[async_trait] -pub trait PageCacheBackend: Send + Sync { - async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult; - async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult; - fn size(&self) -> usize; -} - -#[async_trait] -impl PageCacheBackend for T -where - T: PageCacheBackendOps + Send + Sync + 'static, -{ - async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult { - self.read_page(page, offset).await - } - - async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult { - self.write_page(page, offset).await - } - - fn size(&self) -> usize { - self.size() +impl core::fmt::Debug for PageCache { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("PageCache").finish() } } pub trait PageCacheRawPage: RawPage { - fn valid_size(&self) -> &mut usize; - fn is_dirty(&self) -> bool; - - fn set_dirty(&self); - - fn clear_dirty(&self); - - fn cache_init(&self); + fn set_dirty(&self, dirty: bool); } impl Drop for PageCache { fn drop(&mut self) { - let _ = self.fsync(); + // TODO: Write back dirty pages... + // let _ = self.fsync(); } } diff --git a/src/kernel/syscall/mm.rs b/src/kernel/syscall/mm.rs index 4cb7908c..825440ef 100644 --- a/src/kernel/syscall/mm.rs +++ b/src/kernel/syscall/mm.rs @@ -1,20 +1,15 @@ -use super::FromSyscallArg; -use crate::kernel::constants::{EBADF, EINVAL}; -use crate::kernel::mem::FileMapping; -use crate::kernel::task::Thread; -use crate::kernel::vfs::filearray::FD; -use crate::{ - kernel::{ - constants::{UserMmapFlags, UserMmapProtocol}, - mem::{Mapping, Permission}, - }, - prelude::*, -}; use align_ext::AlignExt; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_mm::paging::PAGE_SIZE; use posix_types::syscall_no::*; +use super::FromSyscallArg; +use crate::kernel::constants::{UserMmapFlags, UserMmapProtocol, EBADF, EINVAL}; +use crate::kernel::mem::{FileMapping, Mapping, Permission}; +use crate::kernel::task::Thread; +use crate::kernel::vfs::filearray::FD; +use crate::prelude::*; + impl FromSyscallArg for UserMmapProtocol { fn from_arg(value: usize) -> UserMmapProtocol { UserMmapProtocol::from_bits_truncate(value as u32) @@ -74,7 +69,7 @@ async fn do_mmap2( .get_inode()? .ok_or(EBADF)?; - Mapping::File(FileMapping::new(file, pgoffset, len)) + Mapping::File(FileMapping::new(file.get_page_cache(), pgoffset, len)) }; let permission = Permission { diff --git a/src/kernel/task/loader/elf.rs b/src/kernel/task/loader/elf.rs index 36d139a8..9f8aa166 100644 --- a/src/kernel/task/loader/elf.rs +++ b/src/kernel/task/loader/elf.rs @@ -1,27 +1,22 @@ +use alloc::ffi::CString; +use alloc::sync::Arc; +use alloc::vec::Vec; + +use align_ext::AlignExt; +use eonix_mm::address::{Addr, AddrOps as _, VAddr}; +use eonix_mm::paging::PAGE_SIZE; +use xmas_elf::header::{self, Class, HeaderPt1, Machine_}; +use xmas_elf::program::{self, ProgramHeader32, ProgramHeader64}; + use super::{LoadInfo, ELF_MAGIC}; -use crate::io::UninitBuffer; +use crate::io::{ByteBuffer, UninitBuffer}; +use crate::kernel::constants::ENOEXEC; +use crate::kernel::mem::{FileMapping, MMList, Mapping, Permission}; use crate::kernel::task::loader::aux_vec::{AuxKey, AuxVec}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::FsContext; use crate::path::Path; -use crate::{ - io::ByteBuffer, - kernel::{ - constants::ENOEXEC, - mem::{FileMapping, MMList, Mapping, Permission}, - vfs::{dentry::Dentry, FsContext}, - }, - prelude::*, -}; -use align_ext::AlignExt; -use alloc::vec::Vec; -use alloc::{ffi::CString, sync::Arc}; -use eonix_mm::{ - address::{Addr, AddrOps as _, VAddr}, - paging::PAGE_SIZE, -}; -use xmas_elf::{ - header::{self, Class, HeaderPt1, Machine_}, - program::{self, ProgramHeader32, ProgramHeader64}, -}; +use crate::prelude::*; const INIT_STACK_SIZE: usize = 0x80_0000; @@ -366,7 +361,7 @@ impl Elf { vmap_start, file_len, Mapping::File(FileMapping::new( - self.file.get_inode()?, + self.file.get_inode()?.get_page_cache(), file_offset, real_file_length, )), @@ -376,16 +371,27 @@ impl Elf { .await?; } - if vmem_len > file_len { - mm_list - .mmap_fixed( - vmap_start + file_len, - vmem_len - file_len, - Mapping::Anonymous, - permission, - false, - ) - .await?; + if vmem_vaddr_end > load_vaddr_end { + if load_vaddr_end.page_offset() != 0 { + let mut zero_len = PAGE_SIZE - load_vaddr_end.page_offset(); + zero_len = zero_len.min(vmem_vaddr_end - load_vaddr_end); + + mm_list + .access_mut(load_vaddr_end, zero_len, |_, data| data.fill(0)) + .await?; + } + + if vmem_len - file_len > 0 { + mm_list + .mmap_fixed( + vmap_start + file_len, + vmem_len - file_len, + Mapping::Anonymous, + permission, + false, + ) + .await?; + } } Ok(vmap_start + vmem_len) diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index c1eb8cb8..22760de9 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -1,35 +1,31 @@ pub mod dcache; mod walk; -use core::{ - cell::UnsafeCell, - fmt, - hash::{BuildHasher, BuildHasherDefault, Hasher}, - sync::atomic::{AtomicPtr, AtomicU64, AtomicU8, Ordering}, -}; - use alloc::sync::Arc; +use core::cell::UnsafeCell; +use core::fmt; +use core::hash::{BuildHasher, BuildHasherDefault, Hasher}; +use core::sync::atomic::{AtomicPtr, AtomicU64, AtomicU8, Ordering}; + use arcref::AsArcRef; use eonix_sync::LazyLock; use pointers::BorrowedArc; -use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX}; - -use crate::{ - hash::KernelHasher, - io::Buffer, - io::Stream, - kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, EPERM, ERANGE}, - kernel::{block::BlockDevice, CharDevice}, - path::Path, - prelude::*, - rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}, -}; - -use super::{ - inode::{Ino, Inode, InodeUse, RenameData, WriteOffset}, - types::{DeviceId, Format, Mode, Permission}, - FsContext, -}; +use posix_types::namei::RenameFlags; +use posix_types::open::OpenFlags; +use posix_types::result::PosixError; +use posix_types::stat::StatX; + +use super::inode::{Ino, InodeUse, RenameData, WriteOffset}; +use super::types::{DeviceId, Format, Mode, Permission}; +use super::FsContext; +use crate::hash::KernelHasher; +use crate::io::{Buffer, Stream}; +use crate::kernel::block::BlockDevice; +use crate::kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, EPERM, ERANGE}; +use crate::kernel::CharDevice; +use crate::path::Path; +use crate::prelude::*; +use crate::rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}; const D_INVALID: u8 = 0; const D_REGULAR: u8 = 1; @@ -56,7 +52,7 @@ enum DentryKind { /// [lookup()]: crate::kernel::vfs::inode::InodeDirOps::lookup struct AssociatedInode { kind: UnsafeCell>, - inode: UnsafeCell>>, + inode: UnsafeCell>, } /// # Safety @@ -181,15 +177,15 @@ impl Dentry { .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent)) } - pub fn fill(&self, file: InodeUse) { + pub fn fill(&self, file: InodeUse) { self.inode.store(file); } - pub fn inode(&self) -> Option> { + pub fn inode(&self) -> Option { self.inode.load().map(|(_, inode)| inode.clone()) } - pub fn get_inode(&self) -> KResult> { + pub fn get_inode(&self) -> KResult { self.inode().ok_or(ENOENT) } @@ -291,7 +287,7 @@ impl Dentry { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.format() { + match inode.format { Format::DIR => Err(EISDIR), Format::REG => inode.read(buffer, offset).await, Format::BLK => { @@ -309,7 +305,7 @@ impl Dentry { pub async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { let inode = self.get_inode()?; // Safety: Changing mode alone will have no effect on the file's contents - match inode.format() { + match inode.format { Format::DIR => Err(EISDIR), Format::REG => inode.write(stream, offset).await, Format::BLK => Err(EINVAL), // TODO @@ -375,7 +371,7 @@ impl Dentry { } pub async fn chmod(&self, mode: Mode) -> KResult<()> { - self.get_inode()?.chmod(mode).await + self.get_inode()?.chmod(mode.perm()).await } pub async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { @@ -438,8 +434,8 @@ impl AssociatedInode { } } - fn store(&self, inode: InodeUse) { - let kind = match inode.format() { + fn store(&self, inode: InodeUse) { + let kind = match inode.format { Format::REG | Format::BLK | Format::CHR => DentryKind::Regular, Format::DIR => DentryKind::Directory, Format::LNK => DentryKind::Symlink, @@ -463,7 +459,7 @@ impl AssociatedInode { DentryKind::atomic_acq(&self.kind) } - fn load(&self) -> Option<(DentryKind, &InodeUse)> { + fn load(&self) -> Option<(DentryKind, &InodeUse)> { self.kind().map(|kind| unsafe { let inode = (&*self.inode.get()) .as_ref() diff --git a/src/kernel/vfs/dentry/walk.rs b/src/kernel/vfs/dentry/walk.rs index 3e401b4b..7b1060ac 100644 --- a/src/kernel/vfs/dentry/walk.rs +++ b/src/kernel/vfs/dentry/walk.rs @@ -1,33 +1,23 @@ -use core::{ - future::Future, - hash::{BuildHasher, BuildHasherDefault, Hasher}, - ops::Deref, - pin::Pin, -}; - -use alloc::{boxed::Box, sync::Arc}; +use alloc::boxed::Box; +use alloc::sync::Arc; +use core::future::Future; +use core::hash::{BuildHasher, BuildHasherDefault, Hasher}; +use core::ops::Deref; +use core::pin::Pin; + use arcref::{ArcRef, AsArcRef}; use posix_types::result::PosixError; -use crate::{ - hash::KernelHasher, - io::ByteBuffer, - kernel::{ - constants::ELOOP, - vfs::{ - inode::{Inode, InodeUse}, - FsContext, - }, - }, - path::{Path, PathComponent, PathIterator}, - prelude::KResult, - rcu::{rcu_read_lock, RCUReadLock}, -}; - -use super::{ - dcache::{self, DCacheItem}, - Dentry, DentryKind, -}; +use super::dcache::{self, DCacheItem}; +use super::{Dentry, DentryKind}; +use crate::hash::KernelHasher; +use crate::io::ByteBuffer; +use crate::kernel::constants::ELOOP; +use crate::kernel::vfs::inode::InodeUse; +use crate::kernel::vfs::FsContext; +use crate::path::{Path, PathComponent, PathIterator}; +use crate::prelude::KResult; +use crate::rcu::{rcu_read_lock, RCUReadLock}; struct DentryFind<'a, 'b> { parent: &'a Dentry, @@ -40,7 +30,7 @@ pub enum WalkResultRcu<'rcu, 'path> { Ok(ArcRef<'rcu, Dentry>), Symlink { symlink: ArcRef<'rcu, Dentry>, - inode: InodeUse, + inode: InodeUse, }, Miss { parent: ArcRef<'rcu, Dentry>, @@ -53,7 +43,7 @@ pub enum WalkResult { Ok(Arc), Symlink { symlink: Arc, - inode: InodeUse, + inode: InodeUse, }, } @@ -270,7 +260,7 @@ impl FsContext { pub async fn follow_symlink( &self, symlink: ArcRef<'_, Dentry>, - inode: &InodeUse, + inode: &InodeUse, nr_follows: u32, ) -> KResult> { let mut target = [0; 256]; @@ -288,7 +278,7 @@ impl FsContext { fn follow_symlink_boxed<'r, 'a: 'r, 'b: 'r, 'c: 'r>( &'a self, symlink: ArcRef<'b, Dentry>, - inode: &'c InodeUse, + inode: &'c InodeUse, nr_follows: u32, ) -> Pin>> + Send + 'r>> { Box::pin(self.follow_symlink(symlink, inode, nr_follows)) diff --git a/src/kernel/vfs/file/inode_file.rs b/src/kernel/vfs/file/inode_file.rs index 96526ee9..d302079c 100644 --- a/src/kernel/vfs/file/inode_file.rs +++ b/src/kernel/vfs/file/inode_file.rs @@ -1,23 +1,17 @@ -use super::{File, FileType, SeekOption}; -use crate::{ - io::{Buffer, BufferFill, Stream}, - kernel::{ - constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}, - vfs::{ - dentry::Dentry, - inode::{Inode, InodeUse, WriteOffset}, - types::Format, - }, - }, - prelude::KResult, -}; use alloc::sync::Arc; + use eonix_sync::Mutex; -use posix_types::{ - getdent::{UserDirent, UserDirent64}, - open::OpenFlags, - stat::StatX, -}; +use posix_types::getdent::{UserDirent, UserDirent64}; +use posix_types::open::OpenFlags; +use posix_types::stat::StatX; + +use super::{File, FileType, SeekOption}; +use crate::io::{Buffer, BufferFill, Stream}; +use crate::kernel::constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::inode::{InodeUse, WriteOffset}; +use crate::kernel::vfs::types::Format; +use crate::prelude::KResult; pub struct InodeFile { pub r: bool, @@ -34,7 +28,7 @@ impl InodeFile { pub fn new(dentry: Arc, flags: OpenFlags) -> File { // SAFETY: `dentry` used to create `InodeFile` is valid. // SAFETY: `mode` should never change with respect to the `S_IFMT` fields. - let format = dentry.inode().expect("dentry should be invalid").format(); + let format = dentry.inode().expect("dentry should be invalid").format; let (r, w, a) = flags.as_rwa(); @@ -98,7 +92,7 @@ impl InodeFile { } impl File { - pub fn get_inode(&self) -> KResult>> { + pub fn get_inode(&self) -> KResult> { if let FileType::Inode(inode_file) = &**self { Ok(Some(inode_file.dentry.get_inode()?)) } else { @@ -191,7 +185,7 @@ impl File { SeekOption::Set(n) => n, SeekOption::End(off) => { let inode = inode_file.dentry.get_inode()?; - let size = inode.info().lock().size as usize; + let size = inode.info.lock().size as usize; size.checked_add_signed(off).ok_or(EOVERFLOW)? } }; diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index 1862a3e1..609d969c 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,28 +1,23 @@ -use super::{ - file::{File, InodeFile, Pipe}, - types::{Format, Permission}, - Spin, TerminalFile, -}; -use crate::kernel::{ - constants::{ - EBADF, EISDIR, ENOTDIR, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL, - }, - syscall::{FromSyscallArg, SyscallRetVal}, -}; -use crate::{ - kernel::{console::get_console, constants::ENXIO, vfs::dentry::Dentry, CharDevice}, - prelude::*, -}; use alloc::sync::Arc; -use intrusive_collections::{ - intrusive_adapter, rbtree::Entry, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, -}; -use itertools::{ - FoldWhile::{Continue, Done}, - Itertools, -}; + +use intrusive_collections::rbtree::Entry; +use intrusive_collections::{intrusive_adapter, Bound, KeyAdapter, RBTree, RBTreeAtomicLink}; +use itertools::FoldWhile::{Continue, Done}; +use itertools::Itertools; use posix_types::open::{FDFlags, OpenFlags}; +use super::file::{File, InodeFile, Pipe}; +use super::types::{Format, Permission}; +use super::{Spin, TerminalFile}; +use crate::kernel::console::get_console; +use crate::kernel::constants::{ + EBADF, EISDIR, ENOTDIR, ENXIO, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL, +}; +use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::CharDevice; +use crate::prelude::*; + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct FD(u32); @@ -291,20 +286,19 @@ impl FileArray { let fdflag = flags.as_fd_flags(); let inode = dentry.get_inode()?; - let file_format = inode.format(); - match (flags.directory(), file_format, flags.write()) { + match (flags.directory(), inode.format, flags.write()) { (true, Format::DIR, _) => {} (true, _, _) => return Err(ENOTDIR), (false, Format::DIR, true) => return Err(EISDIR), _ => {} } - if flags.truncate() && flags.write() && file_format == Format::REG { + if flags.truncate() && flags.write() && inode.format == Format::REG { inode.truncate(0).await?; } - let file = if file_format == Format::CHR { + let file = if inode.format == Format::CHR { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; device.open(flags)? } else { diff --git a/src/kernel/vfs/inode/inode.rs b/src/kernel/vfs/inode/inode.rs index 870a045d..5f0b98c2 100644 --- a/src/kernel/vfs/inode/inode.rs +++ b/src/kernel/vfs/inode/inode.rs @@ -1,52 +1,149 @@ use alloc::boxed::Box; -use core::{ - any::Any, - future::Future, - marker::Unsize, - ops::{CoerceUnsized, Deref}, - pin::Pin, -}; -use eonix_sync::Spin; - +use alloc::collections::btree_map::BTreeMap; use alloc::sync::{Arc, Weak}; -use async_trait::async_trait; +use core::any::Any; +use core::future::Future; +use core::ops::Deref; -use crate::{ - io::{Buffer, Stream}, - kernel::{ - constants::{EINVAL, EPERM}, - mem::PageCache, - timer::Instant, - vfs::{ - dentry::Dentry, - types::{DeviceId, Format, Mode, Permission}, - SbRef, SbUse, SuperBlock, - }, - }, - prelude::KResult, -}; +use async_trait::async_trait; +use eonix_sync::{RwLock, Spin}; use super::{Ino, RenameData, WriteOffset}; +use crate::io::{Buffer, Stream}; +use crate::kernel::constants::{EINVAL, EPERM}; +use crate::kernel::mem::{CachePage, PageCache, PageOffset}; +use crate::kernel::timer::Instant; +use crate::kernel::vfs::dentry::Dentry; +use crate::kernel::vfs::types::{DeviceId, Format, Mode, Permission}; +use crate::kernel::vfs::{SbRef, SbUse, SuperBlock}; +use crate::prelude::KResult; + +pub struct Inode { + pub ino: Ino, + pub format: Format, + pub info: Spin, + pub rwsem: RwLock<()>, + page_cache: Spin>, + sb: SbRef, + ops: Box, +} -pub trait InodeOps: Sized + Send + Sync + 'static { - type SuperBlock: SuperBlock + Sized; - - fn ino(&self) -> Ino; - fn format(&self) -> Format; - fn info(&self) -> &Spin; - - fn super_block(&self) -> &SbRef; +macro_rules! return_type { + ($type:ty) => { + $type + }; + () => { + () + }; +} - fn page_cache(&self) -> Option<&PageCache>; +macro_rules! define_inode_ops { + { + $( + $(#[$attr:meta])* + async fn $method:ident $(<$($lt:lifetime),+>)? (&self $(,)? $($name:ident : $type:ty $(,)?)*) $(-> $ret:ty)? + $body:block + )* + + --- + + $( + $(#[$attr1:meta])* + fn $method1:ident $(<$($lt1:lifetime),+>)? (&self $(,)? $($name1:ident : $type1:ty $(,)?)*) $(-> $ret1:ty)? + $body1:block + )* + } => { + #[allow(unused_variables)] + pub trait InodeOps: Sized + Send + Sync + 'static { + type SuperBlock: SuperBlock + Sized; + + $( + $(#[$attr])* + fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> impl Future + Send { + async { $body } + })* + + $( + $(#[$attr1])* + fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + $body1 + })* + } + + #[async_trait] + trait InodeOpsErased: Any + Send + Sync + 'static { + $(async fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> return_type!($($ret)?);)* + + $(fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?);)* + } + + #[async_trait] + impl InodeOpsErased for T + where + T: InodeOps, + { + $(async fn $method $(<$($lt),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name : $type),* + ) -> return_type!($($ret)?) { + self.$method(sb.downcast(), inode, $($name),*).await + })* + + $(fn $method1 $(<$($lt1),+>)? ( + &self, + sb: SbUse, + inode: &InodeUse, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + self.$method1(sb.downcast(), inode, $($name1),*) + })* + } + + impl InodeUse { + $(pub async fn $method $(<$($lt),+>)? ( + &self, + $($name : $type),* + ) -> return_type!($($ret)?) { + self.ops.$method(self.sbget()?, self, $($name),*).await + })* + + $(pub fn $method1 $(<$($lt1),+>)? ( + &self, + $($name1 : $type1),* + ) -> return_type!($($ret1)?) { + self.ops.$method1(self.sbget()?, self, $($name1),*) + })* + } + }; } -#[allow(unused_variables)] -pub trait InodeDirOps: InodeOps { - fn lookup( - &self, - dentry: &Arc, - ) -> impl Future>>> + Send { - async { Err(EPERM) } +define_inode_ops! { + // DIRECTORY OPERATIONS + + async fn lookup(&self, dentry: &Arc) -> KResult> { + Err(EPERM) } /// Read directory entries and call the given closure for each entry. @@ -55,255 +152,114 @@ pub trait InodeDirOps: InodeOps { /// - Ok(count): The number of entries read. /// - Ok(Err(err)): Some error occurred while calling the given closure. /// - Err(err): An error occurred while reading the directory. - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn readdir( + &self, offset: usize, - for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> impl Future>> + Send + 'r { - async { Err(EPERM) } + for_each_entry: &mut (dyn (for<'a> FnMut(&'a [u8], Ino) -> KResult) + Send), + ) -> KResult> { + Err(EPERM) } - fn create( - &self, - at: &Arc, - mode: Permission, - ) -> impl Future> + Send { - async { Err(EPERM) } + async fn create(&self, at: &Arc, mode: Permission) -> KResult<()> { + Err(EPERM) } - fn mkdir(&self, at: &Dentry, mode: Permission) -> impl Future> + Send { - async { Err(EPERM) } + async fn mkdir(&self, at: &Dentry, mode: Permission) -> KResult<()> { + Err(EPERM) } - fn mknod( - &self, - at: &Dentry, - mode: Mode, - dev: DeviceId, - ) -> impl Future> + Send { - async { Err(EPERM) } + async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { + Err(EPERM) } - fn unlink(&self, at: &Arc) -> impl Future> + Send { - async { Err(EPERM) } + async fn unlink(&self, at: &Arc) -> KResult<()> { + Err(EPERM) } - fn symlink(&self, at: &Arc, target: &[u8]) -> impl Future> + Send { - async { Err(EPERM) } + async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { + Err(EPERM) } - fn rename(&self, rename_data: RenameData<'_, '_>) -> impl Future> + Send { - async { Err(EPERM) } + async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { + Err(EPERM) } -} -#[allow(unused_variables)] -pub trait InodeFileOps: InodeOps { - fn read( - &self, - buffer: &mut dyn Buffer, - offset: usize, - ) -> impl Future> + Send { - async { Err(EINVAL) } + // FILE OPERATIONS + + async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + Err(EINVAL) } - fn read_direct( - &self, - buffer: &mut dyn Buffer, - offset: usize, - ) -> impl Future> + Send { - async { Err(EINVAL) } + async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { + Err(EINVAL) } - fn write( + async fn write( &self, stream: &mut dyn Stream, - offset: WriteOffset<'_>, - ) -> impl Future> + Send { - async { Err(EINVAL) } + offset: WriteOffset<'_> + ) -> KResult { + Err(EINVAL) } - fn write_direct( + async fn write_direct( &self, stream: &mut dyn Stream, offset: usize, - ) -> impl Future> + Send { - async { Err(EINVAL) } - } - - fn devid(&self) -> KResult { + ) -> KResult { Err(EINVAL) } - fn readlink(&self, buffer: &mut dyn Buffer) -> impl Future> + Send { - async { Err(EINVAL) } - } - - fn truncate(&self, length: usize) -> impl Future> + Send { - async { Err(EPERM) } - } - - fn chmod(&self, perm: Permission) -> impl Future> + Send { - async { Err(EPERM) } - } - - fn chown(&self, uid: u32, gid: u32) -> impl Future> + Send { - async { Err(EPERM) } - } -} - -#[async_trait] -pub trait InodeDir { - async fn lookup(&self, dentry: &Arc) -> KResult>>; - async fn create(&self, at: &Arc, perm: Permission) -> KResult<()>; - async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()>; - async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()>; - async fn unlink(&self, at: &Arc) -> KResult<()>; - async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()>; - async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()>; - - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, - offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> Pin>> + Send + 'r>>; -} - -#[async_trait] -pub trait InodeFile { - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; - async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult; - async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult; - async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult; - fn devid(&self) -> KResult; - async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult; - async fn truncate(&self, length: usize) -> KResult<()>; - async fn chmod(&self, mode: Mode) -> KResult<()>; - async fn chown(&self, uid: u32, gid: u32) -> KResult<()>; -} - -pub trait Inode: InodeFile + InodeDir + Any + Send + Sync + 'static { - fn ino(&self) -> Ino; - fn format(&self) -> Format; - fn info(&self) -> &Spin; - - // TODO: This might should be removed... Temporary workaround for now. - fn page_cache(&self) -> Option<&PageCache>; - - fn sbref(&self) -> SbRef; - fn sbget(&self) -> KResult>; -} - -#[async_trait] -impl InodeFile for T -where - T: InodeFileOps, -{ - async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - self.read(buffer, offset).await - } - - async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult { - self.read_direct(buffer, offset).await - } - - async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult { - self.write(stream, offset).await - } - - async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult { - self.write_direct(stream, offset).await - } - - fn devid(&self) -> KResult { - self.devid() - } - async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult { - self.readlink(buffer).await + Err(EINVAL) } async fn truncate(&self, length: usize) -> KResult<()> { - self.truncate(length).await + Err(EPERM) } - async fn chmod(&self, mode: Mode) -> KResult<()> { - self.chmod(Permission::new(mode.non_format_bits())).await + async fn chmod(&self, perm: Permission) -> KResult<()> { + Err(EPERM) } async fn chown(&self, uid: u32, gid: u32) -> KResult<()> { - self.chown(uid, gid).await - } -} - -#[async_trait] -impl InodeDir for T -where - T: InodeDirOps, -{ - async fn lookup(&self, dentry: &Arc) -> KResult>> { - self.lookup(dentry).await - } - - async fn create(&self, at: &Arc, perm: Permission) -> KResult<()> { - self.create(at, perm).await - } - - async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> { - self.mkdir(at, perm).await + Err(EPERM) } - async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> { - self.mknod(at, mode, dev).await - } - - async fn unlink(&self, at: &Arc) -> KResult<()> { - self.unlink(at).await - } - - async fn symlink(&self, at: &Arc, target: &[u8]) -> KResult<()> { - self.symlink(at, target).await + // PAGE CACHE OPERATIONS + async fn read_page(&self, page: &mut CachePage, offset: PageOffset) -> KResult<()> { + Err(EINVAL) } - async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> { - self.rename(rename_data).await + async fn write_page(&self, page: &mut CachePage, offset: PageOffset) -> KResult<()> { + Err(EINVAL) } - fn readdir<'r, 'a: 'r, 'b: 'r>( - &'a self, + async fn write_begin<'a>( + &self, + page_cache: &PageCache, + pages: &'a mut BTreeMap, offset: usize, - callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult + Send), - ) -> Pin>> + Send + 'r>> { - Box::pin(self.readdir(offset, callback)) - } -} - -impl Inode for T -where - T: InodeOps + InodeFile + InodeDir, -{ - fn ino(&self) -> Ino { - self.ino() - } - - fn format(&self) -> Format { - self.format() - } - - fn info(&self) -> &Spin { - self.info() + len: usize, + ) -> KResult<&'a mut CachePage> { + Err(EINVAL) } - fn page_cache(&self) -> Option<&PageCache> { - self.page_cache() + async fn write_end( + &self, + page_cache: &PageCache, + pages: &mut BTreeMap, + offset: usize, + len: usize, + copied: usize + ) -> KResult<()> { + Err(EINVAL) } - fn sbref(&self) -> SbRef { - self.super_block().clone() - } + --- - fn sbget(&self) -> KResult> { - self.super_block().get().map(|sb| sb as _) + fn devid(&self) -> KResult { + Err(EINVAL) } } @@ -321,64 +277,87 @@ pub struct InodeInfo { pub mtime: Instant, } -pub struct InodeUse(Arc) -where - I: Inode + ?Sized; +#[repr(transparent)] +pub struct InodeUse(Arc); + +impl InodeUse { + pub fn new( + sb: SbRef, + ino: Ino, + format: Format, + info: InodeInfo, + ops: impl InodeOps, + ) -> Self { + let inode = Inode { + sb, + ino, + format, + info: Spin::new(info), + rwsem: RwLock::new(()), + page_cache: Spin::new(Weak::new()), + ops: Box::new(ops), + }; -impl InodeUse -where - I: Inode, -{ - pub fn new(inode: I) -> Self { Self(Arc::new(inode)) } - pub fn new_cyclic(inode_func: impl FnOnce(&Weak) -> I) -> Self { - Self(Arc::new_cyclic(inode_func)) + pub fn sbref(&self) -> SbRef { + self.sb.clone() } -} -impl InodeUse -where - I: Inode + ?Sized, -{ - pub fn as_raw(&self) -> *const I { - Arc::as_ptr(&self.0) + pub fn sbget(&self) -> KResult> { + self.sb.get().map(|sb| sb as _) + } + + pub fn get_priv(&self) -> &I + where + I: InodeOps, + { + let ops = (&*self.ops) as &dyn Any; + + ops.downcast_ref() + .expect("InodeUse::private: InodeOps type mismatch") } -} -impl CoerceUnsized> for InodeUse -where - T: Inode + Unsize + ?Sized, - U: Inode + ?Sized, -{ + pub fn get_page_cache(&self) -> Arc { + if let Some(cache) = self.page_cache.lock().upgrade() { + return cache; + } + + // Slow path... + let cache = Arc::new(PageCache::new(self.clone())); + let mut page_cache = self.page_cache.lock(); + if let Some(cache) = page_cache.upgrade() { + return cache; + } + + *page_cache = Arc::downgrade(&cache); + cache + } } -impl Clone for InodeUse -where - I: Inode + ?Sized, -{ +impl Clone for InodeUse { fn clone(&self) -> Self { Self(self.0.clone()) } } -impl core::fmt::Debug for InodeUse -where - I: Inode + ?Sized, -{ +impl core::fmt::Debug for InodeUse { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "InodeUse(ino={})", self.ino()) + write!(f, "InodeUse(ino={})", self.ino) } } -impl Deref for InodeUse -where - I: Inode + ?Sized, -{ - type Target = I; +impl Deref for InodeUse { + type Target = Inode; fn deref(&self) -> &Self::Target { self.0.deref() } } + +impl PartialEq for InodeUse { + fn eq(&self, other: &Self) -> bool { + Arc::ptr_eq(&self.0, &other.0) + } +} diff --git a/src/kernel/vfs/inode/mod.rs b/src/kernel/vfs/inode/mod.rs index 08471ef3..6f4f041a 100644 --- a/src/kernel/vfs/inode/mod.rs +++ b/src/kernel/vfs/inode/mod.rs @@ -4,5 +4,5 @@ mod ops; mod statx; pub use ino::Ino; -pub use inode::{Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse}; +pub use inode::{Inode, InodeInfo, InodeOps, InodeUse}; pub use ops::{RenameData, WriteOffset}; diff --git a/src/kernel/vfs/inode/ops.rs b/src/kernel/vfs/inode/ops.rs index baab1a80..7bf00ce5 100644 --- a/src/kernel/vfs/inode/ops.rs +++ b/src/kernel/vfs/inode/ops.rs @@ -1,9 +1,8 @@ use alloc::sync::Arc; +use super::inode::InodeUse; use crate::kernel::vfs::dentry::Dentry; -use super::{inode::InodeUse, Inode}; - pub enum WriteOffset<'end> { Position(usize), End(&'end mut usize), @@ -12,7 +11,7 @@ pub enum WriteOffset<'end> { pub struct RenameData<'a, 'b> { pub old_dentry: &'a Arc, pub new_dentry: &'b Arc, - pub new_parent: InodeUse, + pub new_parent: InodeUse, pub is_exchange: bool, pub no_replace: bool, } diff --git a/src/kernel/vfs/inode/statx.rs b/src/kernel/vfs/inode/statx.rs index a85ef3af..feb2a1b5 100644 --- a/src/kernel/vfs/inode/statx.rs +++ b/src/kernel/vfs/inode/statx.rs @@ -1,25 +1,17 @@ use posix_types::stat::StatX; -use crate::{ - kernel::{ - constants::{ - STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME, - STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, - }, - vfs::types::Format, - }, - prelude::KResult, +use super::inode::InodeUse; +use crate::kernel::constants::{ + STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME, + STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, }; +use crate::kernel::vfs::types::Format; +use crate::prelude::KResult; -use super::{inode::InodeUse, Inode}; - -impl InodeUse -where - I: Inode + ?Sized, -{ +impl InodeUse { pub fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> { let sb = self.sbget()?; - let info = self.info().lock(); + let info = self.info.lock(); if mask & STATX_NLINK != 0 { stat.stx_nlink = info.nlink as _; @@ -53,10 +45,8 @@ where } if mask & STATX_TYPE != 0 { - let format = self.format(); - - stat.stx_mode |= format.as_raw() as u16; - if let Format::BLK | Format::CHR = format { + stat.stx_mode |= self.format.as_raw() as u16; + if let Format::BLK | Format::CHR = self.format { let devid = self.devid()?; stat.stx_rdev_major = devid.major as _; stat.stx_rdev_minor = devid.minor as _; @@ -65,7 +55,7 @@ where } if mask & STATX_INO != 0 { - stat.stx_ino = self.ino().as_raw(); + stat.stx_ino = self.ino.as_raw(); stat.stx_mask |= STATX_INO; } diff --git a/src/kernel/vfs/mount.rs b/src/kernel/vfs/mount.rs index 213acae9..6b171f81 100644 --- a/src/kernel/vfs/mount.rs +++ b/src/kernel/vfs/mount.rs @@ -1,17 +1,17 @@ -use super::{ - dentry::{dcache, Dentry, DROOT}, - inode::{Inode, InodeUse}, - SbUse, SuperBlock, -}; -use crate::kernel::{ - constants::{EEXIST, ENODEV, ENOTDIR}, - task::block_on, -}; -use crate::prelude::*; -use alloc::{collections::btree_map::BTreeMap, string::ToString as _, sync::Arc}; +use alloc::collections::btree_map::BTreeMap; +use alloc::string::ToString as _; +use alloc::sync::Arc; + use async_trait::async_trait; use eonix_sync::LazyLock; +use super::dentry::{dcache, Dentry, DROOT}; +use super::inode::InodeUse; +use super::{SbUse, SuperBlock}; +use crate::kernel::constants::{EEXIST, ENODEV, ENOTDIR}; +use crate::kernel::task::block_on; +use crate::prelude::*; + pub const MS_RDONLY: u64 = 1 << 0; pub const MS_NOSUID: u64 = 1 << 1; pub const MS_NODEV: u64 = 1 << 2; @@ -39,11 +39,7 @@ pub struct Mount { } impl Mount { - pub fn new( - mp: &Dentry, - sb: SbUse, - root_inode: InodeUse, - ) -> KResult { + pub fn new(mp: &Dentry, sb: SbUse, root_inode: InodeUse) -> KResult { let root_dentry = Dentry::create(mp.parent().clone(), &mp.get_name()); root_dentry.fill(root_inode); diff --git a/src/kernel/vfs/superblock.rs b/src/kernel/vfs/superblock.rs index 85b28c01..e3be5cef 100644 --- a/src/kernel/vfs/superblock.rs +++ b/src/kernel/vfs/superblock.rs @@ -1,16 +1,15 @@ -use core::{ - marker::Unsize, - ops::{CoerceUnsized, Deref}, -}; - use alloc::sync::{Arc, Weak}; -use eonix_sync::RwLock; +use core::any::{Any, TypeId}; +use core::marker::Unsize; +use core::ops::{CoerceUnsized, Deref}; -use crate::{kernel::constants::EIO, prelude::KResult}; +use eonix_sync::RwLock; use super::types::DeviceId; +use crate::kernel::constants::EIO; +use crate::prelude::KResult; -pub trait SuperBlock: Send + Sync + 'static {} +pub trait SuperBlock: Any + Send + Sync + 'static {} #[derive(Debug, Clone)] pub struct SuperBlockInfo { @@ -83,6 +82,36 @@ where } } +impl SbUse +where + S: SuperBlock + ?Sized, +{ + pub fn get_ref(&self) -> SbRef { + SbRef(Arc::downgrade(&self.0)) + } +} + +impl SbUse { + /// Downcast the superblock to a specific type. + /// + /// # Panics + /// Panics if the downcast fails. + pub fn downcast(self) -> SbUse { + let Self(sb_complex) = self; + if (&sb_complex.backend as &dyn Any).type_id() != TypeId::of::() { + panic!("Downcast failed: type mismatch"); + } + + unsafe { + // SAFETY: We have checked the type above and unsized coercion says + // that Arc has the same layout as Arc if T: Unsize. + SbUse(Arc::from_raw( + Arc::into_raw(sb_complex) as *const SuperBlockComplex + )) + } + } +} + impl Clone for SbRef where S: SuperBlock + ?Sized, diff --git a/src/lib.rs b/src/lib.rs index 959cb29f..8457169c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,6 @@ #![feature(allocator_api)] #![feature(c_size_t)] #![feature(coerce_unsized)] -#![feature(concat_idents)] #![feature(arbitrary_self_types)] #![feature(get_mut_unchecked)] #![feature(macro_metavar_expr)] From 913f71f81970bf4dd7f30e60f2f0166eaf13785a Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 00:56:10 +0800 Subject: [PATCH 10/25] mem: introduce new `Folio` abstraction - Remove struct `Page` and add `Folio`s to represent adjacent pages. - Introduce `Zone`s similar to that in Linux. Looking forward to removing all occurrence of `ArchPhysAccess` and so on. - Adapt existing code to new `Folio` interface in a dirty and rough way. Signed-off-by: greatbridf --- .vscode/tasks.json | 2 +- crates/buddy_allocator/src/lib.rs | 112 +++---- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 62 ++-- crates/eonix_hal/src/arch/riscv64/mm.rs | 51 +-- crates/eonix_hal/src/mm.rs | 38 ++- crates/eonix_mm/src/address/paddr.rs | 10 +- crates/eonix_mm/src/page_table.rs | 2 +- crates/eonix_mm/src/page_table/page_table.rs | 149 ++++----- crates/eonix_mm/src/page_table/pte.rs | 3 +- .../eonix_mm/src/page_table/pte_iterator.rs | 138 ++++---- crates/eonix_mm/src/page_table/walk.rs | 210 ++++++++++++ crates/eonix_mm/src/paging.rs | 8 +- crates/eonix_mm/src/paging/list.rs | 14 +- crates/eonix_mm/src/paging/page.rs | 313 +++--------------- crates/eonix_mm/src/paging/page_alloc.rs | 79 +---- crates/eonix_mm/src/paging/raw_page.rs | 47 --- crates/eonix_mm/src/paging/zone.rs | 7 +- crates/slab_allocator/src/lib.rs | 44 ++- src/driver/ahci/command.rs | 28 +- src/driver/ahci/command_table.rs | 7 +- src/driver/ahci/defs.rs | 6 +- src/driver/ahci/slot.rs | 10 +- src/driver/e1000e.rs | 19 +- src/driver/virtio/virtio_blk.rs | 8 +- src/fs/fat32.rs | 12 +- src/fs/fat32/file.rs | 24 -- src/fs/tmpfs/file.rs | 2 +- src/kernel/block.rs | 17 +- src/kernel/mem.rs | 4 +- src/kernel/mem/allocator.rs | 26 +- src/kernel/mem/folio.rs | 210 ++++++++++++ src/kernel/mem/mm_area.rs | 39 ++- src/kernel/mem/mm_list.rs | 64 ++-- src/kernel/mem/mm_list/page_table.rs | 40 +++ src/kernel/mem/page_alloc.rs | 91 ++--- src/kernel/mem/page_alloc/raw_page.rs | 136 ++------ src/kernel/mem/page_alloc/zones.rs | 22 +- src/kernel/mem/page_cache.rs | 97 +++--- src/kernel/mem/paging.rs | 122 +------ src/kernel/task/kernel_stack.rs | 25 +- src/kernel/vfs/file/mod.rs | 4 +- src/kernel_init.rs | 58 ++-- 42 files changed, 1116 insertions(+), 1244 deletions(-) create mode 100644 crates/eonix_mm/src/page_table/walk.rs delete mode 100644 crates/eonix_mm/src/paging/raw_page.rs delete mode 100644 src/fs/fat32/file.rs create mode 100644 src/kernel/mem/folio.rs create mode 100644 src/kernel/mem/mm_list/page_table.rs diff --git a/.vscode/tasks.json b/.vscode/tasks.json index a85ea0cf..e7a54791 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -6,7 +6,7 @@ { "label": "debug run riscv64", "type": "shell", - "command": "make srun ARCH=riscv64 IMG=/Volumes/oscomp/sdcard-rv.img", + "command": "make srun ARCH=riscv64", "isBackground": true, "problemMatcher": [ { diff --git a/crates/buddy_allocator/src/lib.rs b/crates/buddy_allocator/src/lib.rs index abe1ef7b..82a7d6c5 100644 --- a/crates/buddy_allocator/src/lib.rs +++ b/crates/buddy_allocator/src/lib.rs @@ -3,12 +3,12 @@ use core::hint::unreachable_unchecked; use eonix_mm::address::{AddrOps as _, PAddr, PRange}; -use eonix_mm::paging::{PageList, PageListSized, Zone, PFN}; +use eonix_mm::paging::{FolioList, FolioListSized, Zone, PFN}; const MAX_ORDER: u32 = 10; const AREAS: usize = const { MAX_ORDER as usize + 1 }; -pub trait BuddyPage: Sized + 'static { +pub trait BuddyFolio: Sized + 'static { fn pfn(&self) -> PFN; fn get_order(&self) -> u32; @@ -20,19 +20,19 @@ pub trait BuddyPage: Sized + 'static { struct FreeArea where - L: PageList, + L: FolioList, { free_list: L, count: usize, } -unsafe impl Send for FreeArea where L: PageList {} -unsafe impl Sync for FreeArea where L: PageList {} +unsafe impl Send for FreeArea where L: FolioList {} +unsafe impl Sync for FreeArea where L: FolioList {} pub struct BuddyAllocator where Z: Zone + 'static, - L: PageList, + L: FolioList, { zone: &'static Z, free_areas: [FreeArea; AREAS], @@ -41,8 +41,8 @@ where impl BuddyAllocator where Z: Zone + 'static, - Z::Page: BuddyPage, - L: PageListSized, + Z::Page: BuddyFolio, + L: FolioListSized, { pub const fn new(zone: &'static Z) -> Self { Self { @@ -52,13 +52,13 @@ where } } -impl BuddyAllocator +impl BuddyAllocator where - Z: Zone, - L: PageList, - P: BuddyPage + 'static, + Z: Zone, + L: FolioList, + F: BuddyFolio + 'static, { - pub fn create_pages(&mut self, start: PAddr, end: PAddr) { + pub fn create_folios(&mut self, start: PAddr, end: PAddr) { assert!( self.zone .contains_prange(PRange::new(start.ceil(), end.floor())), @@ -82,40 +82,40 @@ where unsafe { // SAFETY: We've checked that the range is within the zone above. - self.add_page_unchecked(pfn, order) + self.add_folio_unchecked(pfn, order) }; pfn = new_end_pfn; } } - fn add_page(&mut self, pfn: PFN, order: u32) { + fn add_folio(&mut self, pfn: PFN, order: u32) { let prange = PRange::from(PAddr::from(pfn)).grow(1 << (order + 12)); assert!( self.zone.contains_prange(prange), - "The given page is not within the zone." + "The given folio is not within the zone." ); unsafe { // SAFETY: Checks above. - self.add_page_unchecked(pfn, order); + self.add_folio_unchecked(pfn, order); } } - unsafe fn add_page_unchecked(&mut self, pfn: PFN, order: u32) { - let Some(page) = self.zone.get_page(pfn) else { + unsafe fn add_folio_unchecked(&mut self, pfn: PFN, order: u32) { + let Some(mut folio) = self.zone.get_page(pfn) else { unsafe { unreachable_unchecked() } }; unsafe { // SAFETY: The caller ensures that the page is unused. - let page_mut = &mut *page.get(); - self.free_areas[order as usize].add_page(page_mut, order); + let folio_mut = folio.as_mut(); + self.free_areas[order as usize].add_folio(folio_mut, order); } } - fn break_page(&mut self, page: &mut P, order: u32, target_order: u32) { - let pfn = page.pfn(); + fn break_folio(&mut self, folio: &mut F, order: u32, target_order: u32) { + let pfn = folio.pfn(); for order in (target_order..order).rev() { let buddy_pfn = pfn + (1 << order); @@ -123,50 +123,50 @@ where unsafe { // SAFETY: We got the page from `self.free_areas`. Checks are // done when we've put the page into the buddy system. - self.add_page_unchecked(buddy_pfn, order); + self.add_folio_unchecked(buddy_pfn, order); } } - page.set_order(target_order); + folio.set_order(target_order); } pub fn alloc_order(&mut self, order: u32) -> Option<&'static mut Z::Page> { for current_order in order..AREAS as u32 { - let Some(page) = self.free_areas[current_order as usize].get_free_page() else { + let Some(folio) = self.free_areas[current_order as usize].get_free_folio() else { continue; }; if current_order > order { - self.break_page(page, current_order, order); + self.break_folio(folio, current_order, order); } - return Some(page); + return Some(folio); } None } - pub unsafe fn dealloc(&mut self, page: &'static mut Z::Page) { - let mut pfn = page.pfn(); - let mut order = page.get_order(); + pub unsafe fn dealloc(&mut self, folio: &'static mut Z::Page) { + let mut pfn = folio.pfn(); + let mut order = folio.get_order(); assert!( - !page.is_buddy(), - "Trying to free a page that is already in the buddy system: {pfn:?}", + !folio.is_buddy(), + "Trying to free a folio that is already in the buddy system: {pfn:?}", ); while order < MAX_ORDER { let buddy_pfn = pfn.buddy_pfn(order); - let Some(buddy_page) = self.try_get_buddy(buddy_pfn, order) else { + let Some(buddy) = self.try_get_buddy(buddy_pfn, order) else { break; }; - self.free_areas[order as usize].remove_page(buddy_page); + self.free_areas[order as usize].remove_folio(buddy); pfn = pfn.combined_pfn(buddy_pfn); order += 1; } - self.add_page(pfn, order); + self.add_folio(pfn, order); } /// This function checks whether the given page is within our [`Zone`] and @@ -176,32 +176,32 @@ where /// - the buddy is within the same [`Zone`] as us. /// - the buddy is a free buddy (in some [`FreeArea`]) /// - the buddy has order [`order`] - fn try_get_buddy<'a>(&mut self, buddy_pfn: PFN, order: u32) -> Option<&'a mut P> { - let buddy_page = self.zone.get_page(buddy_pfn)?; + fn try_get_buddy<'a>(&mut self, buddy_pfn: PFN, order: u32) -> Option<&'a mut F> { + let mut buddy = self.zone.get_page(buddy_pfn)?; unsafe { // SAFETY: We just test whether the page is a buddy. - let buddy_page_ref = &*buddy_page.get(); + let buddy_ref = buddy.as_ref(); - if !buddy_page_ref.is_buddy() { + if !buddy_ref.is_buddy() { return None; } // Sad... - if buddy_page_ref.get_order() != order { + if buddy_ref.get_order() != order { return None; } // SAFETY: We have the mutable reference to the buddy allocator. // So all the pages within are exclusively accessible to us. - Some(&mut *buddy_page.get()) + Some(buddy.as_mut()) } } } impl FreeArea where - L: PageListSized, + L: FolioListSized, { const fn new() -> Self { Self { @@ -213,34 +213,34 @@ where impl FreeArea where - L: PageList, - L::Page: BuddyPage + 'static, + L: FolioList, + L::Folio: BuddyFolio + 'static, { - pub fn get_free_page(&mut self) -> Option<&'static mut L::Page> { - self.free_list.pop_head().map(|page| { + pub fn get_free_folio(&mut self) -> Option<&'static mut L::Folio> { + self.free_list.pop_head().map(|folio| { assert_ne!(self.count, 0, "Oops"); - page.set_buddy(false); + folio.set_buddy(false); self.count -= 1; - page + folio }) } - pub fn add_page(&mut self, page: &'static mut L::Page, order: u32) { - page.set_order(order); - page.set_buddy(true); + pub fn add_folio(&mut self, folio: &'static mut L::Folio, order: u32) { + folio.set_order(order); + folio.set_buddy(true); self.count += 1; - self.free_list.push_tail(page); + self.free_list.push_tail(folio); } - pub fn remove_page(&mut self, page: &mut L::Page) { + pub fn remove_folio(&mut self, folio: &mut L::Folio) { assert_ne!(self.count, 0, "Oops"); - page.set_buddy(false); + folio.set_buddy(false); self.count -= 1; - self.free_list.remove(page); + self.free_list.remove(folio); } } diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 7b3dc043..b2305f99 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -2,12 +2,13 @@ use core::alloc::Allocator; use core::arch::{asm, global_asm, naked_asm}; use core::cell::RefCell; use core::hint::spin_loop; +use core::ptr::NonNull; use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; use eonix_hal_traits::mm::Memory; use eonix_mm::address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PagingMode, PTE as _}; -use eonix_mm::paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN}; +use eonix_mm::page_table::{PageAttribute, PageTable, PagingMode, TableAttribute, PTE as _}; +use eonix_mm::paging::{Folio, FrameAlloc, PageAccess, PageBlock, PAGE_SIZE, PFN}; use eonix_percpu::PercpuArea; use fdt::Fdt; use riscv::asm::sfence_vma_all; @@ -23,9 +24,12 @@ use super::cpu::{CPUID, CPU_COUNT}; use super::time::set_next_timer; use crate::arch::cpu::CPU; use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; -use crate::arch::mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE}; +use crate::arch::mm::{ + ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, RawPageTableSv48, + GLOBAL_PAGE_TABLE, +}; use crate::bootstrap::BootStrapData; -use crate::mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; +use crate::mm::{ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -38,26 +42,26 @@ static TEMP_AP_STACK: [u8; 256] = [0; 256]; static TEMP_AP_STACK_START: &'static [u8; 256] = &TEMP_AP_STACK; #[repr(C, align(4096))] -struct PageTable([u64; PTES_PER_PAGE]); +struct BootPageTable([u64; PTES_PER_PAGE]); /// map 0x8000 0000 to itself and 0xffff ffff 8000 0000 #[unsafe(link_section = ".bootstrap.page_table.1")] -static BOOT_PAGE_TABLE: PageTable = { +static BOOT_PAGE_TABLE: BootPageTable = { let mut arr: [u64; PTES_PER_PAGE] = [0; PTES_PER_PAGE]; arr[0] = 0 | 0x2f; arr[510] = 0 | 0x2f; arr[511] = (0x80202 << 10) | 0x21; - PageTable(arr) + BootPageTable(arr) }; #[unsafe(link_section = ".bootstrap.page_table.2")] #[used] -static PT1: PageTable = { +static PT1: BootPageTable = { let mut arr: [u64; PTES_PER_PAGE] = [0; PTES_PER_PAGE]; arr[510] = (0x80000 << 10) | 0x2f; - PageTable(arr) + BootPageTable(arr) }; static BSP_PAGE_ALLOC: AtomicPtr> = AtomicPtr::new(core::ptr::null_mut()); @@ -111,7 +115,7 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { real_allocator.borrow_mut().add_range(range); } - setup_kernel_page_table(&alloc); + setup_kernel_page_table(alloc.clone()); unsafe { init_dtb_and_fdt(dtb_addr); } @@ -148,8 +152,12 @@ unsafe extern "C" { /// TODO: /// 对kernel image添加更细的控制,或者不加也行 -fn setup_kernel_page_table(alloc: impl PageAlloc) { - let global_page_table = &GLOBAL_PAGE_TABLE; +fn setup_kernel_page_table(alloc: BasicPageAllocRef) { + let global_page_table = PageTable::::new( + GLOBAL_PAGE_TABLE.clone(), + alloc.clone(), + PageAccessImpl, + ); let attr = PageAttribute::WRITE | PageAttribute::READ @@ -160,18 +168,11 @@ fn setup_kernel_page_table(alloc: impl PageAlloc) { const KERNEL_BSS_START: VAddr = VAddr::from(0xffffffff40000000); // Map kernel BSS - for pte in global_page_table.iter_kernel_in( - VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize), - ArchPagingMode::LEVELS, - &alloc, - ) { - let page = Page::alloc_in(&alloc); - - let attr = { - let mut attr = attr.clone(); - attr.remove(PageAttribute::EXECUTE); - attr - }; + let bss_range = VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize); + for pte in global_page_table.iter_kernel(bss_range) { + let page = alloc.alloc().unwrap(); + let attr = attr.difference(PageAttribute::EXECUTE); + pte.set(page.into_raw(), attr.into()); } @@ -189,17 +190,22 @@ fn setup_kernel_page_table(alloc: impl PageAlloc) { ); } sfence_vma_all(); + + core::mem::forget(global_page_table); } /// set up tp register to percpu -fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) { +fn setup_cpu(alloc: impl FrameAlloc, hart_id: usize) { CPU_COUNT.fetch_add(1, Ordering::Relaxed); let mut percpu_area = PercpuArea::new(|layout| { let page_count = layout.size().div_ceil(PAGE_SIZE); - let page = Page::alloc_at_least_in(page_count, alloc); + let page = alloc.alloc_at_least(page_count).unwrap(); - let ptr = ArchPhysAccess::get_ptr_for_page(&page).cast(); + let ptr = unsafe { + // TODO: safety + ArchPhysAccess::as_ptr(page.start()) + }; page.into_raw(); ptr @@ -243,7 +249,7 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { for hart_id in FDT.harts().filter(|&id| id != local_hart_id) { let stack_range = { let page_alloc = BasicPageAllocRef::new(&page_alloc); - let ap_stack = Page::alloc_order_in(4, page_alloc); + let ap_stack = page_alloc.alloc_order(4).unwrap(); let stack_range = ap_stack.range(); ap_stack.into_raw(); stack_range diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 46dd9437..f67646cf 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -1,31 +1,25 @@ -use super::{ - config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}, - fdt::{FdtExt, FDT}, -}; -use crate::{arch::riscv64::config::mm::KIMAGE_OFFSET, traits::mm::Memory}; -use core::{marker::PhantomData, ptr::NonNull}; -use eonix_mm::{ - address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}, - page_table::{ - PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable, - TableAttribute, PTE, - }, - paging::{NoAlloc, Page, PageBlock, PFN}, +use core::marker::PhantomData; +use core::ptr::NonNull; + +use eonix_hal_traits::mm::Memory; +use eonix_mm::address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}; +use eonix_mm::page_table::{ + PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable, + TableAttribute, PTE, }; +use eonix_mm::paging::{BasicFolio, Folio, PageAccess, PageBlock, PFN}; use eonix_sync_base::LazyLock; use fdt::Fdt; -use riscv::{ - asm::{sfence_vma, sfence_vma_all}, - register::satp, -}; +use riscv::asm::{sfence_vma, sfence_vma_all}; +use riscv::register::satp; -pub const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); -pub static GLOBAL_PAGE_TABLE: LazyLock> = - LazyLock::new(|| unsafe { - Page::with_raw(PAGE_TABLE_BASE, |root_table_page| { - PageTable::with_root_table(root_table_page.clone()) - }) - }); +use super::config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}; +use super::fdt::{FdtExt, FDT}; +use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::mm::BasicPageAlloc; + +const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); +pub const GLOBAL_PAGE_TABLE: BasicFolio = BasicFolio::new(PAGE_TABLE_BASE, 0); pub const PA_V: u64 = 0b1 << 0; pub const PA_R: u64 = 0b1 << 1; @@ -61,6 +55,9 @@ pub struct ArchPhysAccess; pub struct ArchMemory; +#[derive(Clone)] +pub struct PageAccessImpl; + impl PTE for PTE64 { type Attr = PageAttribute64; @@ -261,6 +258,12 @@ impl PhysAccess for ArchPhysAccess { } } +impl PageAccess for PageAccessImpl { + unsafe fn get_ptr_for_pfn(&self, pfn: PFN) -> NonNull { + unsafe { ArchPhysAccess::as_ptr(PAddr::from(pfn)) } + } +} + impl Memory for ArchMemory { fn present_ram() -> impl Iterator { FDT.present_ram() diff --git a/crates/eonix_hal/src/mm.rs b/crates/eonix_hal/src/mm.rs index 0a5597ac..c4b9bb74 100644 --- a/crates/eonix_hal/src/mm.rs +++ b/crates/eonix_hal/src/mm.rs @@ -1,16 +1,14 @@ -use core::{ - alloc::{AllocError, Allocator, Layout}, - cell::RefCell, - ptr::NonNull, -}; -use eonix_mm::{ - address::{AddrOps as _, PRange}, - paging::{PageAlloc, UnmanagedRawPage, PAGE_SIZE, PFN}, -}; +use core::alloc::{AllocError, Allocator, Layout}; +use core::cell::RefCell; +use core::ptr::NonNull; + +use eonix_mm::address::{AddrOps as _, PRange}; +use eonix_mm::page_table::PageTableAlloc; +use eonix_mm::paging::{BasicFolio, FrameAlloc, PAGE_SIZE, PFN}; pub use crate::arch::mm::{ flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchMemory, - ArchPagingMode, ArchPhysAccess, GLOBAL_PAGE_TABLE, + ArchPhysAccess, GLOBAL_PAGE_TABLE, }; pub struct BasicPageAlloc { @@ -118,19 +116,23 @@ impl<'a> BasicPageAllocRef<'a> { } } -impl PageAlloc for BasicPageAllocRef<'_> { - type RawPage = UnmanagedRawPage; +impl FrameAlloc for BasicPageAllocRef<'_> { + type Folio = BasicFolio; - fn alloc_order(&self, order: u32) -> Option { - Some(Self::RawPage::new(self.0.borrow_mut().alloc(order), order)) + fn alloc_order(&self, order: u32) -> Option { + Some(BasicFolio::new(self.0.borrow_mut().alloc(order), order)) } +} + +impl PageTableAlloc for BasicPageAllocRef<'_> { + type Folio = BasicFolio; - unsafe fn dealloc(&self, _: Self::RawPage) { - panic!("Dealloc is not supported in BasicPageAlloc"); + fn alloc(&self) -> Self::Folio { + FrameAlloc::alloc(self).unwrap() } - fn has_management_over(&self, _: Self::RawPage) -> bool { - true + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio { + BasicFolio::new(pfn, 0) } } diff --git a/crates/eonix_mm/src/address/paddr.rs b/crates/eonix_mm/src/address/paddr.rs index 6fadbd2a..bbfa299e 100644 --- a/crates/eonix_mm/src/address/paddr.rs +++ b/crates/eonix_mm/src/address/paddr.rs @@ -1,11 +1,11 @@ +use core::fmt; +use core::ops::{Add, Sub}; +use core::ptr::NonNull; + use super::addr::Addr; use crate::paging::{PAGE_SIZE_BITS, PFN}; -use core::{ - fmt, - ops::{Add, Sub}, - ptr::NonNull, -}; +/// Convert PAddr to VAddr. pub trait PhysAccess { /// Translate the data that this address is pointing to into kernel /// accessible pointer. Use it with care. diff --git a/crates/eonix_mm/src/page_table.rs b/crates/eonix_mm/src/page_table.rs index 55732f72..f3528060 100644 --- a/crates/eonix_mm/src/page_table.rs +++ b/crates/eonix_mm/src/page_table.rs @@ -3,7 +3,7 @@ mod paging_mode; mod pte; mod pte_iterator; -pub use page_table::{PageTable, RawPageTable}; +pub use page_table::{PageTable, PageTableAlloc, RawPageTable}; pub use paging_mode::{PageTableLevel, PagingMode}; pub use pte::{PageAttribute, RawAttribute, TableAttribute, PTE}; pub use pte_iterator::PageTableIterator; diff --git a/crates/eonix_mm/src/page_table/page_table.rs b/crates/eonix_mm/src/page_table/page_table.rs index 8318049f..80be63b9 100644 --- a/crates/eonix_mm/src/page_table/page_table.rs +++ b/crates/eonix_mm/src/page_table/page_table.rs @@ -1,15 +1,12 @@ -use super::{ - paging_mode::PageTableLevel, - pte::{RawAttribute, TableAttribute}, - pte_iterator::{KernelIterator, UserIterator}, - PagingMode, PTE, -}; -use crate::{ - address::{PAddr, VRange}, - page_table::PageTableIterator, - paging::{GlobalPageAlloc, Page, PageAccess, PageAlloc, PageBlock}, -}; -use core::{marker::PhantomData, ptr::NonNull}; +use core::marker::PhantomData; +use core::ptr::NonNull; + +use super::paging_mode::PageTableLevel; +use super::pte::{RawAttribute, TableAttribute}; +use super::{PagingMode, PTE}; +use crate::address::{PAddr, VRange}; +use crate::page_table::PageTableIterator; +use crate::paging::{Folio, PageAccess, PageBlock, PFN}; pub trait RawPageTable<'a>: Send + 'a { type Entry: PTE + 'a; @@ -24,45 +21,60 @@ pub trait RawPageTable<'a>: Send + 'a { unsafe fn from_ptr(ptr: NonNull) -> Self; } +pub trait PageTableAlloc: Clone { + type Folio: Folio; + + fn alloc(&self) -> Self::Folio; + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio; +} + +pub trait GlobalPageTableAlloc: PageTableAlloc { + const GLOBAL: Self; +} + pub struct PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { - root_table_page: Page, - phantom: PhantomData<&'a (M, X)>, + root_table_page: A::Folio, + alloc: A, + access: X, + phantom: PhantomData<&'a M>, } impl<'a, M, A, X> PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { - pub fn with_root_table(root_table_page: Page) -> Self { + pub fn new(root_table_page: A::Folio, alloc: A, access: X) -> Self { Self { root_table_page, + alloc, + access, phantom: PhantomData, } } pub fn clone_global<'b, B>(&self) -> PageTable<'b, M, B, X> where - B: GlobalPageAlloc, + B: GlobalPageTableAlloc, { - self.clone_in(B::global()) + self.clone_in(B::GLOBAL) } pub fn clone_in<'b, B>(&self, alloc: B) -> PageTable<'b, M, B, X> where - B: PageAlloc, + B: PageTableAlloc, { - let new_root_table_page = Page::alloc_in(alloc); - let new_table_data = X::get_ptr_for_page(&new_root_table_page); - let kernel_table_data = X::get_ptr_for_page(&self.root_table_page); + let new_root_table_page = alloc.alloc(); + let new_table_data = self.access.get_ptr_for_page(&new_root_table_page); + let kernel_table_data = self.access.get_ptr_for_page(&self.root_table_page); unsafe { // SAFETY: `new_table_data` and `kernel_table_data` are both valid pointers @@ -82,7 +94,7 @@ where root_page_table.index_mut(idx).take(); } - PageTable::with_root_table(new_root_table_page) + PageTable::new(new_root_table_page, alloc, self.access.clone()) } pub fn addr(&self) -> PAddr { @@ -90,100 +102,59 @@ where } pub fn iter_user(&self, range: VRange) -> impl Iterator { - let alloc = self.root_table_page.allocator(); - let page_table_ptr = X::get_ptr_for_page(&self.root_table_page); + let page_table_ptr = self.access.get_ptr_for_page(&self.root_table_page); let root_page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) }; - PageTableIterator::::new(root_page_table, range, alloc.clone()) - } - - pub fn iter_kernel(&self, range: VRange) -> impl Iterator { - self.iter_kernel_levels(range, M::LEVELS) + PageTableIterator::::new( + root_page_table, + range, + TableAttribute::USER, + self.alloc.clone(), + self.access.clone(), + ) } - /// Iterates over the kernel space entries in the page table for the specified levels. - /// - /// # Parameters - /// - `range`: The virtual address range to iterate over. - /// - `levels`: A slice of `PageTableLevel` that specifies which levels of the page table - /// should be included in the iteration. Each level corresponds to a level in the page - /// table hierarchy, and the iterator will traverse entries at these levels. + /// Iterates over the kernel space entries in the page table. /// /// # Returns /// An iterator over mutable references to the page table entries (`M::Entry`) within the - /// specified range and levels. + /// specified range. /// /// # Example /// ``` /// let range = VRange::new(0x1234000, 0x1300000); - /// let levels = &M::LEVELS[..2]; - /// for pte in page_table.iter_kernel_levels(range, levels) { + /// for pte in page_table.iter_kernel(range) { /// // Process each entry /// } /// ``` - pub fn iter_kernel_levels( - &self, - range: VRange, - levels: &'static [PageTableLevel], - ) -> impl Iterator { - self.iter_kernel_in(range, levels, self.root_table_page.allocator()) - } - - /// Iterates over the kernel space entries in the page table for the specified levels - /// with a given page allocator. - /// - /// # Parameters - /// - `range`: The virtual address range to iterate over. - /// - `levels`: A slice of `PageTableLevel` that specifies which levels of the page table - /// should be included in the iteration. Each level corresponds to a level in the page - /// table hierarchy, and the iterator will traverse entries at these levels. - /// - `alloc`: A page allocator that provides memory for the page table entries. - /// - /// # Returns - /// An iterator over mutable references to the page table entries (`M::Entry`) within the - /// specified range and levels. - /// - /// # Example - /// ```no_run - /// let range = VRange::new(0x1234000, 0x1300000); - /// let levels = &M::LEVELS[..2]; - /// for pte in page_table.iter_kernel_in(range, levels, NoAlloc) { - /// // Process each entry - /// } - /// ``` - pub fn iter_kernel_in( - &self, - range: VRange, - levels: &'static [PageTableLevel], - alloc: A1, - ) -> impl Iterator { - let page_table_ptr = X::get_ptr_for_page(&self.root_table_page); + pub fn iter_kernel(&self, range: VRange) -> impl Iterator { + let page_table_ptr = self.access.get_ptr_for_page(&self.root_table_page); let root_page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) }; - PageTableIterator::::with_levels( + PageTableIterator::::with_levels( root_page_table, range, - alloc, - levels, + TableAttribute::GLOBAL, + self.alloc.clone(), + self.access.clone(), + M::LEVELS, ) } - fn drop_page_table_recursive(page_table: &Page, levels: &[PageTableLevel]) { + fn drop_page_table_recursive(&self, page_table: &A::Folio, levels: &[PageTableLevel]) { let [level, remaining_levels @ ..] = levels else { return }; if remaining_levels.is_empty() { // We reached the last level, no need to go deeper. return; } - let alloc = page_table.allocator(); - - let page_table_ptr = X::get_ptr_for_page(page_table); + let page_table_ptr = self.access.get_ptr_for_page(page_table); let mut page_table = unsafe { // SAFETY: `page_table_ptr` is a valid pointer to a page table. M::RawTable::from_ptr(page_table_ptr) @@ -201,10 +172,10 @@ where let page_table = unsafe { // SAFETY: We got the pfn from a valid page table entry, so it should be valid. - Page::from_raw_in(pfn, alloc.clone()) + self.alloc.from_raw(pfn) }; - Self::drop_page_table_recursive(&page_table, remaining_levels); + self.drop_page_table_recursive(&page_table, remaining_levels); } } } @@ -213,10 +184,10 @@ impl<'a, M, A, X> Drop for PageTable<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { fn drop(&mut self) { - Self::drop_page_table_recursive(&self.root_table_page, M::LEVELS); + self.drop_page_table_recursive(&self.root_table_page, M::LEVELS); } } diff --git a/crates/eonix_mm/src/page_table/pte.rs b/crates/eonix_mm/src/page_table/pte.rs index e067d207..c14d5738 100644 --- a/crates/eonix_mm/src/page_table/pte.rs +++ b/crates/eonix_mm/src/page_table/pte.rs @@ -1,6 +1,7 @@ -use crate::paging::PFN; use bitflags::bitflags; +use crate::paging::PFN; + bitflags! { #[derive(Clone, Copy, PartialEq)] pub struct TableAttribute: usize { diff --git a/crates/eonix_mm/src/page_table/pte_iterator.rs b/crates/eonix_mm/src/page_table/pte_iterator.rs index 89b9fb9f..a9e4ff46 100644 --- a/crates/eonix_mm/src/page_table/pte_iterator.rs +++ b/crates/eonix_mm/src/page_table/pte_iterator.rs @@ -1,62 +1,14 @@ -use super::{ - pte::{RawAttribute, TableAttribute}, - PageTableLevel, PagingMode, RawPageTable as _, PTE, -}; -use crate::{ - address::{AddrOps as _, VRange}, - paging::{Page, PageAccess, PageAlloc}, -}; -use core::{marker::PhantomData}; - -pub struct KernelIterator; -pub struct UserIterator; - -pub trait IteratorType { - fn page_table_attributes() -> TableAttribute; - - fn get_page_table<'a, A, X>(pte: &mut M::Entry, alloc: &A) -> M::RawTable<'a> - where - A: PageAlloc, - X: PageAccess, - { - let attr = pte.get_attr().as_table_attr().expect("Not a page table"); - - if attr.contains(TableAttribute::PRESENT) { - let pfn = pte.get_pfn(); - unsafe { - // SAFETY: We are creating a pointer to a page referenced to in - // some page table, which should be valid. - let page_table_ptr = X::get_ptr_for_pfn(pfn); - // SAFETY: `page_table_ptr` is a valid pointer to a page table. - M::RawTable::from_ptr(page_table_ptr) - } - } else { - let page = Page::alloc_in(alloc.clone()); - let page_table_ptr = X::get_ptr_for_page(&page); - - unsafe { - // SAFETY: `page_table_ptr` is good for writing and properly aligned. - page_table_ptr.write_bytes(0, 1); - } - - pte.set( - page.into_raw(), - ::Attr::from(Self::page_table_attributes()), - ); - - unsafe { - // SAFETY: `page_table_ptr` is a valid pointer to a page table. - M::RawTable::from_ptr(page_table_ptr) - } - } - } -} +use super::page_table::PageTableAlloc; +use super::pte::{RawAttribute, TableAttribute}; +use super::{PageTableLevel, PagingMode, RawPageTable as _, PTE}; +use crate::address::{AddrOps as _, VRange}; +use crate::paging::{Folio, PageAccess}; -pub struct PageTableIterator<'a, M, A, X, K> +pub struct PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, { /// Specifies the hierarchy of page table levels to iterate over. @@ -69,19 +21,19 @@ where indicies: [u16; 8], tables: [Option>; 8], + fill_entry_attr: TableAttribute, + alloc: A, - _phantom: PhantomData<&'a (X, K)>, + access: X, } -impl<'a, M, A, X, K> PageTableIterator<'a, M, A, X, K> +impl<'a, M, A, X> PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, - K: IteratorType, { - fn parse_tables_starting_from(&mut self, idx_level: usize) { for (idx, &pt_idx) in self .indicies @@ -98,18 +50,58 @@ where }; let parent_table = parent_table.as_mut().expect("Parent table is None"); let next_pte = parent_table.index_mut(pt_idx); - child_table.replace(K::get_page_table::(next_pte, &self.alloc)); + + child_table.replace({ + let attr = next_pte + .get_attr() + .as_table_attr() + .expect("Not a page table"); + + if attr.contains(TableAttribute::PRESENT) { + let pfn = next_pte.get_pfn(); + unsafe { + // SAFETY: We are creating a pointer to a page referenced to in + // some page table, which should be valid. + let page_table_ptr = self.access.get_ptr_for_pfn(pfn); + // SAFETY: `page_table_ptr` is a valid pointer to a page table. + M::RawTable::from_ptr(page_table_ptr) + } + } else { + let page = self.alloc.alloc(); + let page_table_ptr = self.access.get_ptr_for_page(&page); + + unsafe { + // SAFETY: `page_table_ptr` is good for writing and properly aligned. + page_table_ptr.write_bytes(0, 1); + } + + next_pte.set(page.into_raw(), self.fill_entry_attr.into()); + + unsafe { + // SAFETY: `page_table_ptr` is a valid pointer to a page table. + M::RawTable::from_ptr(page_table_ptr) + } + } + }); } } - pub fn new(page_table: M::RawTable<'a>, range: VRange, alloc: A) -> Self { - Self::with_levels(page_table, range, alloc, M::LEVELS) + pub fn new( + page_table: M::RawTable<'a>, + range: VRange, + fill_entry_attr: TableAttribute, + alloc: A, + access: X, + ) -> Self { + Self::with_levels(page_table, range, fill_entry_attr, alloc, access, M::LEVELS) } pub fn with_levels( page_table: M::RawTable<'a>, range: VRange, + fill_entry_attr: TableAttribute, alloc: A, + access: X, levels: &'static [PageTableLevel], ) -> Self { let start = range.start().floor(); @@ -122,8 +114,9 @@ where remaining: (end - start) / last_level.page_size(), indicies: [0; 8], tables: [const { None }; 8], + fill_entry_attr: fill_entry_attr.union(TableAttribute::PRESENT), alloc, - _phantom: PhantomData, + access, }; for (i, level) in levels.iter().enumerate() { @@ -137,13 +130,12 @@ where } } -impl<'a, M, A, X, K> Iterator for PageTableIterator<'a, M, A, X, K> +impl<'a, M, A, X> Iterator for PageTableIterator<'a, M, A, X> where M: PagingMode, M::Entry: 'a, - A: PageAlloc, + A: PageTableAlloc, X: PageAccess, - K: IteratorType, { type Item = &'a mut M::Entry; @@ -178,15 +170,3 @@ where Some(retval) } } - -impl IteratorType for KernelIterator { - fn page_table_attributes() -> TableAttribute { - TableAttribute::PRESENT | TableAttribute::GLOBAL - } -} - -impl IteratorType for UserIterator { - fn page_table_attributes() -> TableAttribute { - TableAttribute::PRESENT | TableAttribute::USER - } -} diff --git a/crates/eonix_mm/src/page_table/walk.rs b/crates/eonix_mm/src/page_table/walk.rs new file mode 100644 index 00000000..aba80b09 --- /dev/null +++ b/crates/eonix_mm/src/page_table/walk.rs @@ -0,0 +1,210 @@ +use super::pte::{RawAttribute, TableAttribute}; +use super::{PageTableLevel, PTE}; +use crate::address::{AddrOps, VAddr, VRange}; +use crate::paging::PFN; + +pub enum WalkState { + Next, + Skip, + Break, +} + +pub trait PageTable: Sized { + type Entry: PTE; + const LEVELS: &'static [PageTableLevel]; + + fn index(&self, index: usize) -> &Self::Entry; + fn index_mut(&mut self, index: usize) -> &mut Self::Entry; + + fn from_pfn(pfn: PFN) -> Self; + unsafe fn take_pfn(pfn: PFN) -> Self; +} + +pub struct PageTableWalk<'a, T, D> +where + T: PageTable, +{ + levels: &'a [PageTableLevel], + fill_entry: &'a [fn(&mut D, &mut T::Entry) -> Option], + walk_entry: &'a [fn(&mut D, &mut T::Entry) -> WalkState], + data: D, +} + +fn try_get_table( + entry: &mut T::Entry, + data: &mut D, + fill_entry: fn(&mut D, &mut T::Entry) -> Option, +) -> Option +where + T: PageTable, +{ + let (mut pfn, attr) = entry.get(); + + // Always skip huge page entries + let attr = attr.as_table_attr()?; + + // For normal entries, check present flags + if !attr.contains(TableAttribute::PRESENT) { + // Skip entries filled with nothing + pfn = fill_entry(data, entry)?; + } + + Some(T::from_pfn(pfn)) +} + +fn _walk_page_table( + walk: &mut PageTableWalk, + cur_level: usize, + table: &mut T, + range: VRange, +) where + T: PageTable, +{ + let level = walk.levels[cur_level]; + + let page_size = level.page_size(); + let mut addr = range.start(); + + while addr < range.end() { + let idx = level.index_of(addr); + let entry = table.index_mut(idx); + + let mut next_table = None; + if cur_level < walk.levels.len() - 1 { + next_table = try_get_table(entry, &mut walk.data, walk.fill_entry[cur_level]); + } + + match ( + walk.walk_entry[cur_level](&mut walk.data, entry), + &mut next_table, + ) { + (WalkState::Break, _) => break, + (WalkState::Next, Some(next_table)) => _walk_page_table( + walk, + cur_level + 1, + next_table, + VRange::new(addr, range.end()), + ), + // `fill_entry` says that we shouldn't continue. + (WalkState::Next, None) => {} + _ => {} + } + + addr = addr.floor_to(page_size) + page_size; + } +} + +pub fn walk_page_table(walk: &mut PageTableWalk, table: &mut T, range: VRange) +where + T: PageTable, +{ + _walk_page_table(walk, 0, table, range); +} + +pub fn drop_user_page_table(mut root_page_table: T) +where + T: PageTable, +{ + fn walk(_: &mut (), entry: &mut T::Entry) -> WalkState { + let (pfn, attr) = entry.get(); + let Some(attr) = attr.as_table_attr() else { + return WalkState::Skip; + }; + + if !attr.contains(TableAttribute::USER) { + return WalkState::Skip; + } + + unsafe { + // Check `_walk_page_table`: We will and only will touch the next level of table with + // `next_table` holding a refcount. We take the table away from the parent table now. + T::take_pfn(pfn); + } + + entry.set(PFN::from_val(0), TableAttribute::empty().into()); + + if LEVEL == 2 { + WalkState::Skip + } else { + WalkState::Next + } + } + + let mut walk = PageTableWalk { + levels: T::LEVELS, + fill_entry: &[no_fill::, no_fill::, no_fill::], + walk_entry: &[walk::, walk::, walk::, skip_walk::], + data: (), + }; + + walk_page_table( + &mut walk, + &mut root_page_table, + VRange::new(VAddr::from(0), VAddr::from(0x0000_8000_0000_0000)), + ); +} + +pub fn iter_pte( + page_table: &mut T, + range: VRange, + fill_func: impl FnMut(&mut T::Entry) -> Option, + for_each: impl FnMut(&mut T::Entry), +) { + let walker = (fill_func, for_each); + + fn fill_entry( + (fill, _): &mut ( + impl FnMut(&mut T::Entry) -> Option, + impl FnMut(&mut T::Entry), + ), + entry: &mut T::Entry, + ) -> Option { + fill(entry) + } + + fn walk_entry( + (_, for_each): &mut ( + impl FnMut(&mut T::Entry) -> Option, + impl FnMut(&mut T::Entry), + ), + entry: &mut T::Entry, + ) -> WalkState { + for_each(entry); + WalkState::Next + } + + let mut walk = PageTableWalk { + levels: T::LEVELS, + fill_entry: &[fill_entry::, fill_entry::, fill_entry::], + walk_entry: &[ + cont_walk::, + cont_walk::, + cont_walk::, + walk_entry::, + ], + data: walker, + }; + + walk_page_table(&mut walk, page_table, range); +} + +pub fn no_fill(_: &mut D, _: &mut T::Entry) -> Option +where + T: PageTable, +{ + None +} + +pub fn skip_walk(_: &mut D, _: &mut T::Entry) -> WalkState +where + T: PageTable, +{ + WalkState::Skip +} + +pub fn cont_walk(_: &mut D, _: &mut T::Entry) -> WalkState +where + T: PageTable, +{ + WalkState::Next +} diff --git a/crates/eonix_mm/src/paging.rs b/crates/eonix_mm/src/paging.rs index 0c4811f2..f0166cf3 100644 --- a/crates/eonix_mm/src/paging.rs +++ b/crates/eonix_mm/src/paging.rs @@ -2,12 +2,10 @@ mod list; mod page; mod page_alloc; mod pfn; -mod raw_page; mod zone; -pub use list::{PageList, PageListSized}; -pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; -pub use page_alloc::{GlobalPageAlloc, NoAlloc, PageAlloc}; +pub use list::{FolioList, FolioListSized}; +pub use page::{BasicFolio, Folio, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS}; +pub use page_alloc::{FrameAlloc, GlobalFrameAlloc}; pub use pfn::PFN; -pub use raw_page::{RawPage, UnmanagedRawPage}; pub use zone::Zone; diff --git a/crates/eonix_mm/src/paging/list.rs b/crates/eonix_mm/src/paging/list.rs index a52cf947..2dd557c9 100644 --- a/crates/eonix_mm/src/paging/list.rs +++ b/crates/eonix_mm/src/paging/list.rs @@ -1,16 +1,16 @@ -pub trait PageList { - type Page; +pub trait FolioList { + type Folio; fn is_empty(&self) -> bool; - fn peek_head(&mut self) -> Option<&mut Self::Page>; + fn peek_head(&mut self) -> Option<&mut Self::Folio>; - fn pop_head(&mut self) -> Option<&'static mut Self::Page>; - fn push_tail(&mut self, page: &'static mut Self::Page); - fn remove(&mut self, page: &mut Self::Page); + fn pop_head(&mut self) -> Option<&'static mut Self::Folio>; + fn push_tail(&mut self, page: &'static mut Self::Folio); + fn remove(&mut self, page: &mut Self::Folio); } -pub trait PageListSized: PageList + Sized { +pub trait FolioListSized: FolioList + Sized { const NEW: Self; fn new() -> Self { diff --git a/crates/eonix_mm/src/paging/page.rs b/crates/eonix_mm/src/paging/page.rs index c5a14b5e..8b067e43 100644 --- a/crates/eonix_mm/src/paging/page.rs +++ b/crates/eonix_mm/src/paging/page.rs @@ -1,6 +1,8 @@ -use super::{GlobalPageAlloc, PageAlloc, RawPage as _, PFN}; -use crate::address::{AddrRange, PAddr, PhysAccess}; -use core::{fmt, mem::ManuallyDrop, ptr::NonNull, sync::atomic::Ordering}; +use core::mem::ManuallyDrop; +use core::ptr::NonNull; + +use super::PFN; +use crate::address::{PAddr, PRange}; pub const PAGE_SIZE: usize = 4096; pub const PAGE_SIZE_BITS: u32 = PAGE_SIZE.trailing_zeros(); @@ -15,306 +17,81 @@ pub struct PageBlock([u8; PAGE_SIZE]); /// A trait that provides the kernel access to the page. #[doc(notable_trait)] -pub trait PageAccess { +pub trait PageAccess: Clone { /// Returns a kernel-accessible pointer to the page referenced by the given /// physical frame number. /// /// # Safety /// This function is unsafe because calling this function on some non-existing /// pfn will cause undefined behavior. - unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull; + unsafe fn get_ptr_for_pfn(&self, pfn: PFN) -> NonNull; /// Returns a kernel-accessible pointer to the given page. - fn get_ptr_for_page(page: &Page) -> NonNull { + fn get_ptr_for_page(&self, page: &F) -> NonNull { unsafe { // SAFETY: `page.pfn()` is guaranteed to be valid. - Self::get_ptr_for_pfn(page.pfn()) + self.get_ptr_for_pfn(page.pfn()) } } } -/// A Page allocated in allocator `A`. -#[derive(PartialEq, Eq, PartialOrd, Ord)] -pub struct Page { - raw_page: A::RawPage, - alloc: A, -} - -unsafe impl Send for Page {} -unsafe impl Sync for Page {} - -impl Page -where - A: GlobalPageAlloc, -{ - /// Allocate a page of the given *order*. - pub fn alloc_order(order: u32) -> Self { - Self::alloc_order_in(order, A::global()) - } - - /// Allocate exactly one page. - pub fn alloc() -> Self { - Self::alloc_in(A::global()) - } +/// A [`Folio`] represents one page or a bunch of adjacent pages. +pub trait Folio { + /// Returns the physical frame number of the folio, which is aligned with + /// the folio's size and valid. + fn pfn(&self) -> PFN; - /// Allocate a contiguous block of pages that can contain at least `count` pages. - pub fn alloc_at_least(count: usize) -> Self { - Self::alloc_at_least_in(count, A::global()) - } + /// Returns the folio's *order* (log2 of the number of pages contained in + /// the folio). + fn order(&self) -> u32; - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to a valid page allocated through `alloc_order()` and that the - /// page have not been freed or deallocated yet. - /// - /// No checks are done. Any violation of this assumption may lead to undefined behavior. - pub unsafe fn from_raw_unchecked(pfn: PFN) -> Self { - unsafe { Self::from_raw_unchecked_in(pfn, A::global()) } - } - - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// This function is a safe wrapper around `from_paddr_unchecked()` that does **some sort - /// of** checks to ensure that the page is valid and managed by the allocator. - /// - /// # Panic - /// This function will panic if the page is not valid or if the page is not managed by - /// the allocator. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to an existing page (A.K.A. inside the global page array) and the - /// page will not be freed or deallocated during the call. - pub unsafe fn from_raw(pfn: PFN) -> Self { - unsafe { Self::from_raw_in(pfn, A::global()) } - } - - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_in()`. - /// - /// # Safety - /// Check `from_raw()` for the safety requirements. - pub unsafe fn with_raw(pfn: PFN, func: F) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { Self::with_raw_in(pfn, A::global(), func) } - } - - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_unchecked_in()`. - /// - /// # Safety - /// Check `from_raw_unchecked()` for the safety requirements. - pub unsafe fn with_raw_unchecked(pfn: PFN, func: F, alloc: A) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { Self::with_raw_unchecked_in(pfn, func, alloc) } - } -} - -impl Page -where - A: PageAlloc, -{ - /// Allocate a page of the given *order*. - pub fn alloc_order_in(order: u32, alloc: A) -> Self { - Self { - raw_page: alloc.alloc_order(order).expect("Out of memory"), - alloc, - } - } - - /// Allocate exactly one page. - pub fn alloc_in(alloc: A) -> Self { - Self { - raw_page: alloc.alloc().expect("Out of memory"), - alloc, - } - } - - /// Allocate a contiguous block of pages that can contain at least `count` pages. - pub fn alloc_at_least_in(count: usize, alloc: A) -> Self { - Self { - raw_page: alloc.alloc_at_least(count).expect("Out of memory"), - alloc, - } - } - - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to a valid page managed by `alloc` and that the page have not - /// been freed or deallocated yet. - /// - /// No checks are done. Any violation of this assumption may lead to undefined behavior. - pub unsafe fn from_raw_unchecked_in(pfn: PFN, alloc: A) -> Self { - Self { - raw_page: A::RawPage::from(pfn), - alloc, - } + /// Returns the total size of the folio in bytes. + fn len(&self) -> usize { + 1 << (self.order() + PAGE_SIZE_BITS) } - /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched. - /// - /// This function is a safe wrapper around `from_paddr_unchecked()` that does **some sort - /// of** checks to ensure that the page is valid and managed by the allocator. - /// - /// # Panic - /// This function will panic if the page is not valid or if the page is not managed by - /// the allocator. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller has ensured that - /// `pfn` points to an existing page (A.K.A. inside the global page array) and the - /// page will not be freed or deallocated during the call. - pub unsafe fn from_raw_in(pfn: PFN, alloc: A) -> Self { - unsafe { - // SAFETY: The caller guarantees that the page is inside the global page array. - assert!(alloc.has_management_over(A::RawPage::from(pfn))); - - // SAFETY: We've checked that the validity of the page. And the caller guarantees - // that the page will not be freed or deallocated during the call. - Self::from_raw_unchecked_in(pfn, alloc) - } + /// Returns the start physical address of the folio, which is guaranteed to + /// be aligned to the folio's size and valid. + fn start(&self) -> PAddr { + PAddr::from(self.pfn()) } - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_in()`. - /// - /// # Safety - /// Check `from_raw_in()` for the safety requirements. - pub unsafe fn with_raw_in(pfn: PFN, alloc: A, func: F) -> O - where - F: FnOnce(&Self) -> O, - { - unsafe { - let me = ManuallyDrop::new(Self::from_raw_in(pfn, alloc)); - func(&me) - } + /// Returns the physical address range of the ifolio, which is guaranteed to + /// be aligned to the folio's size and valid. + fn range(&self) -> PRange { + PRange::from(self.start()).grow(self.len()) } - /// Do some work with the page without touching the reference count with the same - /// restrictions as `from_raw_unchecked_in()`. - /// - /// # Safety - /// Check `from_raw_unchecked_in()` for the safety requirements. - pub unsafe fn with_raw_unchecked_in(pfn: PFN, func: F, alloc: A) -> O + /// Consumes the folio and returns the PFN without dropping the reference + /// count the folio holds. + fn into_raw(self) -> PFN where - F: FnOnce(&Self) -> O, + Self: Sized, { - unsafe { - let me = ManuallyDrop::new(Self::from_raw_unchecked_in(pfn, alloc)); - func(&me) - } - } - - /// Whether we are the only owner of the page. - pub fn is_exclusive(&self) -> bool { - self.raw_page.refcount().load(Ordering::Acquire) == 1 - } - - /// Returns the *order* of the page, which is the log2 of the number of pages - /// contained in the page object. - pub fn order(&self) -> u32 { - self.raw_page.order() - } - - /// Returns the total size of the page in bytes. - pub fn len(&self) -> usize { - 1 << (self.order() + PAGE_SIZE_BITS) - } - - /// Consumes the `Page` and returns the physical frame number without dropping - /// the reference count the page holds. - pub fn into_raw(self) -> PFN { let me = ManuallyDrop::new(self); me.pfn() } - - /// Returns the physical frame number of the page, which is aligned with the - /// page size and valid. - pub fn pfn(&self) -> PFN { - Into::::into(self.raw_page) - } - - /// Returns the start physical address of the page, which is guaranteed to be - /// aligned to the page size and valid. - pub fn start(&self) -> PAddr { - PAddr::from(self.pfn()) - } - - /// Returns the physical address range of the page, which is guaranteed to be - /// aligned to the page size and valid. - pub fn range(&self) -> AddrRange { - AddrRange::from(self.start()).grow(self.len()) - } - - /// Get the allocator that manages this page. - pub fn allocator(&self) -> &A { - &self.alloc - } } -impl Clone for Page -where - A: PageAlloc, -{ - fn clone(&self) -> Self { - // SAFETY: Memory order here can be Relaxed is for the same reason as that - // in the copy constructor of `std::shared_ptr`. - self.raw_page.refcount().fetch_add(1, Ordering::Relaxed); - - Self { - raw_page: self.raw_page, - alloc: self.alloc.clone(), - } - } +/// A simple [`Folio`] with no reference counting or other ownership mechanism. +#[derive(Clone)] +pub struct BasicFolio { + pfn: PFN, + order: u32, } -impl Drop for Page -where - A: PageAlloc, -{ - fn drop(&mut self) { - match self.raw_page.refcount().fetch_sub(1, Ordering::AcqRel) { - 0 => panic!("Refcount for an in-use page is 0"), - 1 => unsafe { - // SAFETY: `self.raw_page` points to a valid page inside the global page array. - assert!(self.alloc.has_management_over(self.raw_page)); - - // SAFETY: `self.raw_page` is managed by the allocator and we're dropping the page. - self.alloc.dealloc(self.raw_page) - }, - _ => {} - } +impl BasicFolio { + pub const fn new(pfn: PFN, order: u32) -> Self { + Self { pfn, order } } } -impl fmt::Debug for Page { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "Page({:?}, order={})", - Into::::into(self.raw_page), - self.order() - ) +impl Folio for BasicFolio { + fn pfn(&self) -> PFN { + self.pfn } -} -impl PageAccess for T -where - T: PhysAccess, -{ - unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull { - unsafe { - // SAFETY: The physical address of a existing page must be - // aligned to the page size. - T::as_ptr(PAddr::from(pfn)) - } + fn order(&self) -> u32 { + self.order } } diff --git a/crates/eonix_mm/src/paging/page_alloc.rs b/crates/eonix_mm/src/paging/page_alloc.rs index fe222605..267d3ccb 100644 --- a/crates/eonix_mm/src/paging/page_alloc.rs +++ b/crates/eonix_mm/src/paging/page_alloc.rs @@ -1,89 +1,44 @@ -use super::{raw_page::UnmanagedRawPage, RawPage}; +use super::Folio; -/// A trait for allocating and deallocating pages of memory. +/// A trait for allocating and deallocating folios. /// /// Note that the instances of this trait should provide pointer-like or reference-like /// behavior, meaning that the allocators are to be passed around by value and stored in /// managed data structures. This is because the allocator may be used to deallocate the /// pages it allocates. -#[doc(notable_trait)] -pub trait PageAlloc: Clone { - type RawPage: RawPage; +pub trait FrameAlloc: Clone { + type Folio: Folio; - /// Allocate a page of the given *order*. - fn alloc_order(&self, order: u32) -> Option; + /// Allocate a folio of the given *order*. + fn alloc_order(&self, order: u32) -> Option; - /// Allocate exactly one page. - fn alloc(&self) -> Option { + /// Allocate exactly one folio. + fn alloc(&self) -> Option { self.alloc_order(0) } - /// Allocate a contiguous block of pages that can contain at least `count` pages. - fn alloc_at_least(&self, count: usize) -> Option { + /// Allocate a folio that can contain at least [`count`] contiguous pages. + fn alloc_at_least(&self, count: usize) -> Option { let order = count.next_power_of_two().trailing_zeros(); self.alloc_order(order) } - - /// Deallocate a page. - /// - /// # Safety - /// This function is unsafe because it assumes that the caller MUST ensure that - /// `raw_page` is allocated in this allocator and never used after this call. - unsafe fn dealloc(&self, raw_page: Self::RawPage); - - /// Check whether the page is allocated and managed by the allocator. - fn has_management_over(&self, page_ptr: Self::RawPage) -> bool; } /// A trait for global page allocators. /// /// Global means that we can get an instance of the allocator from anywhere in the kernel. -#[doc(notable_trait)] -pub trait GlobalPageAlloc: PageAlloc + 'static { - /// Get the global page allocator. - fn global() -> Self; +pub trait GlobalFrameAlloc: FrameAlloc + 'static { + /// The global page allocator. + const GLOBAL: Self; } -#[derive(Clone)] -pub struct NoAlloc; - -impl<'a, A> PageAlloc for &'a A +impl<'a, A> FrameAlloc for &'a A where - A: PageAlloc, + A: FrameAlloc, { - type RawPage = A::RawPage; + type Folio = A::Folio; - fn alloc_order(&self, order: u32) -> Option { + fn alloc_order(&self, order: u32) -> Option { (*self).alloc_order(order) } - - unsafe fn dealloc(&self, raw_page: Self::RawPage) { - unsafe { (*self).dealloc(raw_page) } - } - - fn has_management_over(&self, raw_page: Self::RawPage) -> bool { - (*self).has_management_over(raw_page) - } -} - -impl PageAlloc for NoAlloc { - type RawPage = UnmanagedRawPage; - - fn alloc_order(&self, _: u32) -> Option { - panic!("`NoAlloc` cannot allocate pages"); - } - - unsafe fn dealloc(&self, _: Self::RawPage) { - panic!("`NoAlloc` cannot free pages"); - } - - fn has_management_over(&self, _: Self::RawPage) -> bool { - true - } -} - -impl GlobalPageAlloc for NoAlloc { - fn global() -> Self { - Self - } } diff --git a/crates/eonix_mm/src/paging/raw_page.rs b/crates/eonix_mm/src/paging/raw_page.rs deleted file mode 100644 index 789e863b..00000000 --- a/crates/eonix_mm/src/paging/raw_page.rs +++ /dev/null @@ -1,47 +0,0 @@ -use core::sync::atomic::AtomicUsize; - -use super::PFN; - -/// A `RawPage` represents a page of memory in the kernel. It is a low-level -/// representation of a page that is used by the kernel to manage memory. -#[doc(notable_trait)] -pub trait RawPage: Clone + Copy + From + Into { - fn order(&self) -> u32; - fn refcount(&self) -> &AtomicUsize; -} - -#[derive(Clone, Copy)] -pub struct UnmanagedRawPage(PFN, u32); - -/// Unmanaged raw pages should always have a non-zero refcount to -/// avoid `free()` from being called. -static UNMANAGED_RAW_PAGE_CLONE_COUNT: AtomicUsize = AtomicUsize::new(1); - -impl UnmanagedRawPage { - pub const fn new(pfn: PFN, order: u32) -> Self { - Self(pfn, order) - } -} - -impl From for UnmanagedRawPage { - fn from(value: PFN) -> Self { - Self::new(value, 0) - } -} - -impl Into for UnmanagedRawPage { - fn into(self) -> PFN { - let Self(pfn, _) = self; - pfn - } -} - -impl RawPage for UnmanagedRawPage { - fn order(&self) -> u32 { - self.1 - } - - fn refcount(&self) -> &AtomicUsize { - &UNMANAGED_RAW_PAGE_CLONE_COUNT - } -} diff --git a/crates/eonix_mm/src/paging/zone.rs b/crates/eonix_mm/src/paging/zone.rs index ec3ed15e..a2e85343 100644 --- a/crates/eonix_mm/src/paging/zone.rs +++ b/crates/eonix_mm/src/paging/zone.rs @@ -1,7 +1,6 @@ -use core::cell::UnsafeCell; +use core::ptr::NonNull; -#[allow(unused_imports)] -use super::{Page, PageAlloc, RawPage, PFN}; +use super::PFN; use crate::address::PRange; /// A [`Zone`] holds a lot of [`Page`]s that share the same NUMA node or @@ -16,5 +15,5 @@ pub trait Zone: Send + Sync { /// /// # Return /// [`None`] if [`pfn`] is not in this [`Zone`]. - fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell>; + fn get_page(&self, pfn: PFN) -> Option>; } diff --git a/crates/slab_allocator/src/lib.rs b/crates/slab_allocator/src/lib.rs index 8597331d..c3e7f392 100644 --- a/crates/slab_allocator/src/lib.rs +++ b/crates/slab_allocator/src/lib.rs @@ -2,7 +2,7 @@ use core::ptr::NonNull; -use eonix_mm::paging::{PageList, PageListSized}; +use eonix_mm::paging::{FolioList, FolioListSized}; use eonix_sync::Spin; #[repr(C)] @@ -84,21 +84,21 @@ where } } -pub trait SlabPageAlloc { +/// Allocate a page suitable for slab system use. The page MUST come with +/// its allocation count 0 and next free slot None. +/// +/// # Safety +/// The page returned MUST have been properly initialized after allocation. +pub unsafe trait SlabPageAlloc { type Page: SlabPage; - type PageList: PageList; + type PageList: FolioList; - /// Allocate a page suitable for slab system use. The page MUST come with - /// its allocation count 0 and next free slot None. - /// - /// # Safety - /// The page returned MUST be properly initialized before its usage. - unsafe fn alloc_uninit(&self) -> &'static mut Self::Page; + fn alloc_slab_page(&self) -> &'static mut Self::Page; } pub(crate) struct SlabList where - T: PageList, + T: FolioList, { empty_list: T, partial_list: T, @@ -120,7 +120,7 @@ unsafe impl Sync for SlabAlloc where P: SlabPag impl SlabAlloc where L: SlabPageAlloc, - L::PageList: PageListSized, + L::PageList: FolioListSized, { pub fn new_in(alloc: L) -> Self { Self { @@ -148,7 +148,7 @@ where impl SlabList where - T: PageListSized, + T: FolioListSized, { const fn new(object_size: usize) -> Self { Self { @@ -162,8 +162,8 @@ where impl SlabList where - T: PageList, - T::Page: SlabPage, + T: FolioList, + T::Folio: SlabPage, { fn alloc_from_partial(&mut self) -> NonNull { let head = self.partial_list.peek_head().unwrap(); @@ -190,18 +190,16 @@ where slot } - fn charge(&mut self, alloc: &impl SlabPageAlloc) { - unsafe { - let slab = alloc.alloc_uninit(); - let free_slot = make_slab_page(slab.get_data_ptr(), self.object_size); + fn charge(&mut self, alloc: &impl SlabPageAlloc) { + let slab = alloc.alloc_slab_page(); + let free_slot = make_slab_page(slab.get_data_ptr(), self.object_size); - slab.set_free_slot(Some(free_slot)); + slab.set_free_slot(Some(free_slot)); - self.empty_list.push_tail(slab); - } + self.empty_list.push_tail(slab); } - fn alloc(&mut self, alloc: &impl SlabPageAlloc) -> NonNull { + fn alloc(&mut self, alloc: &impl SlabPageAlloc) -> NonNull { if !self.partial_list.is_empty() { return self.alloc_from_partial(); } @@ -216,7 +214,7 @@ where unsafe fn dealloc(&mut self, ptr: NonNull, _alloc: &impl SlabPageAlloc) { let slab_page = unsafe { // SAFETY: - ::from_allocated(ptr) + ::from_allocated(ptr) }; let (was_full, is_empty); diff --git a/src/driver/ahci/command.rs b/src/driver/ahci/command.rs index c83339b7..4609d38d 100644 --- a/src/driver/ahci/command.rs +++ b/src/driver/ahci/command.rs @@ -1,9 +1,11 @@ +use eonix_mm::paging::Folio as _; + use crate::kernel::constants::EINVAL; -use crate::kernel::mem::paging::Page; +use crate::kernel::mem::Folio; use crate::prelude::*; pub trait Command { - fn pages(&self) -> &[Page]; + fn pages(&self) -> &[Folio]; fn lba(&self) -> u64; // in sectors @@ -14,19 +16,19 @@ pub trait Command { } pub struct IdentifyCommand { - page: Page, + page: Folio, } impl IdentifyCommand { pub fn new() -> Self { Self { - page: Page::alloc(), + page: Folio::alloc(), } } } impl Command for IdentifyCommand { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { core::slice::from_ref(&self.page) } @@ -47,14 +49,14 @@ impl Command for IdentifyCommand { } } -pub struct ReadLBACommand<'lt> { - pages: &'lt [Page], +pub struct ReadLBACommand<'a> { + pages: &'a [Folio], lba: u64, count: u16, } -impl<'lt> ReadLBACommand<'lt> { - pub fn new(pages: &'lt [Page], lba: u64, count: u16) -> KResult { +impl<'a> ReadLBACommand<'a> { + pub fn new(pages: &'a [Folio], lba: u64, count: u16) -> KResult { if pages.len() > 248 { return Err(EINVAL); } @@ -69,7 +71,7 @@ impl<'lt> ReadLBACommand<'lt> { } impl Command for ReadLBACommand<'_> { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { self.pages } @@ -91,13 +93,13 @@ impl Command for ReadLBACommand<'_> { } pub struct WriteLBACommand<'a> { - pages: &'a [Page], + pages: &'a [Folio], lba: u64, count: u16, } impl<'a> WriteLBACommand<'a> { - pub fn new(pages: &'a [Page], lba: u64, count: u16) -> KResult { + pub fn new(pages: &'a [Folio], lba: u64, count: u16) -> KResult { if pages.len() > 248 { return Err(EINVAL); } @@ -112,7 +114,7 @@ impl<'a> WriteLBACommand<'a> { } impl Command for WriteLBACommand<'_> { - fn pages(&self) -> &[Page] { + fn pages(&self) -> &[Folio] { self.pages } diff --git a/src/driver/ahci/command_table.rs b/src/driver/ahci/command_table.rs index 7b78d26f..00fc8a0b 100644 --- a/src/driver/ahci/command_table.rs +++ b/src/driver/ahci/command_table.rs @@ -1,13 +1,14 @@ use core::ptr::NonNull; use eonix_mm::address::PAddr; +use eonix_mm::paging::Folio as _; use super::command::Command; use super::{PRDTEntry, FISH2D}; -use crate::kernel::mem::{Page, PageExt}; +use crate::kernel::mem::FolioOwned; pub struct CommandTable { - page: Page, + page: FolioOwned, cmd_fis: NonNull, prdt: NonNull<[PRDTEntry; 248]>, prdt_entries: usize, @@ -18,7 +19,7 @@ unsafe impl Sync for CommandTable {} impl CommandTable { pub fn new() -> Self { - let page = Page::alloc(); + let page = FolioOwned::alloc(); let base = page.get_ptr(); unsafe { diff --git a/src/driver/ahci/defs.rs b/src/driver/ahci/defs.rs index c5440246..66841da8 100644 --- a/src/driver/ahci/defs.rs +++ b/src/driver/ahci/defs.rs @@ -1,7 +1,9 @@ #![allow(dead_code)] -use crate::kernel::mem::paging::Page; use eonix_mm::address::Addr as _; +use eonix_mm::paging::Folio as _; + +use crate::kernel::mem::Folio; pub const VENDOR_INTEL: u16 = 0x8086; pub const DEVICE_AHCI: u16 = 0x2922; @@ -239,7 +241,7 @@ pub struct PRDTEntry { } impl PRDTEntry { - pub fn setup(&mut self, page: &Page) { + pub fn setup(&mut self, page: &Folio) { self.base = page.start().addr() as u64; self._reserved1 = 0; diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index 06c6f2ec..dd096f57 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -3,18 +3,18 @@ use core::ptr::NonNull; use core::task::{Poll, Waker}; use eonix_mm::address::{Addr as _, PAddr}; +use eonix_mm::paging::Folio as _; use eonix_sync::{Spin, SpinIrq as _}; use super::command_table::CommandTable; use super::CommandHeader; use crate::kernel::constants::EIO; -use crate::kernel::mem::paging::AllocZeroed; -use crate::kernel::mem::{Page, PageExt}; +use crate::kernel::mem::FolioOwned; use crate::KResult; pub struct CommandList { base: NonNull, - _page: Page, + _page: FolioOwned, } unsafe impl Send for CommandList {} @@ -75,7 +75,9 @@ impl CommandList { } pub fn new() -> Self { - let page = Page::zeroed(); + let mut page = FolioOwned::alloc(); + page.as_bytes_mut().fill(0); + let base = page.get_ptr(); let controls_ptr = Self::controls_ptr(base); diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index 73143c2c..6d6ca353 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -5,11 +5,12 @@ use core::ptr::NonNull; use async_trait::async_trait; use eonix_hal::fence::memory_barrier; use eonix_mm::address::{Addr, PAddr}; +use eonix_mm::paging::Folio as _; use eonix_sync::SpinIrq; use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO}; use crate::kernel::interrupt::register_irq_handler; -use crate::kernel::mem::{PageExcl, PageExt, PhysAccess}; +use crate::kernel::mem::{FolioOwned, PhysAccess}; use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError}; use crate::net::netdev; use crate::prelude::*; @@ -54,13 +55,13 @@ struct E1000eDev { id: u32, regs: Registers, - rt_desc_page: PageExcl, + rt_desc_page: FolioOwned, rx_head: Option, rx_tail: Option, tx_tail: Option, - rx_buffers: Box<[PageExcl; RX_DESC_SIZE]>, - tx_buffers: Box<[Option; TX_DESC_SIZE]>, + rx_buffers: Box<[FolioOwned; RX_DESC_SIZE]>, + tx_buffers: Box<[Option; TX_DESC_SIZE]>, } fn test(val: u32, bit: u32) -> bool { @@ -227,7 +228,7 @@ impl netdev::Netdev for E1000eDev { return Err(EIO); } - let mut buffer_page = PageExcl::alloc(); + let mut buffer_page = FolioOwned::alloc(); if buf.len() > buffer_page.len() { return Err(EFAULT); } @@ -363,11 +364,15 @@ impl E1000eDev { speed: netdev::LinkSpeed::SpeedUnknown, id: netdev::alloc_id(), regs, - rt_desc_page: PageExcl::zeroed(), + rt_desc_page: { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio + }, rx_head: None, rx_tail: None, tx_tail: None, - rx_buffers: Box::new(core::array::from_fn(|_| PageExcl::alloc_order(2))), + rx_buffers: Box::new(core::array::from_fn(|_| FolioOwned::alloc_order(2))), tx_buffers: Box::new([const { None }; 32]), }; diff --git a/src/driver/virtio/virtio_blk.rs b/src/driver/virtio/virtio_blk.rs index c5a3c3d2..5dfed88a 100644 --- a/src/driver/virtio/virtio_blk.rs +++ b/src/driver/virtio/virtio_blk.rs @@ -3,7 +3,7 @@ use alloc::boxed::Box; use async_trait::async_trait; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{Addr, PAddr, PhysAccess}; -use eonix_mm::paging::PFN; +use eonix_mm::paging::{Folio as _, PFN}; use eonix_sync::Spin; use virtio_drivers::device::blk::VirtIOBlk; use virtio_drivers::transport::Transport; @@ -12,7 +12,7 @@ use virtio_drivers::Hal; use crate::io::Chunks; use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue}; use crate::kernel::constants::EIO; -use crate::kernel::mem::{Page, PageExt}; +use crate::kernel::mem::Folio; use crate::prelude::KResult; pub struct HAL; @@ -22,7 +22,7 @@ unsafe impl Hal for HAL { pages: usize, _direction: virtio_drivers::BufferDirection, ) -> (virtio_drivers::PhysAddr, core::ptr::NonNull) { - let page = Page::alloc_at_least(pages); + let page = Folio::alloc_at_least(pages); let ptr = page.get_ptr(); let pfn = page.into_raw(); @@ -40,7 +40,7 @@ unsafe impl Hal for HAL { unsafe { // SAFETY: The caller ensures that the pfn corresponds to a valid // page allocated by `dma_alloc`. - Page::from_raw(pfn); + Folio::from_raw(pfn); } 0 diff --git a/src/fs/fat32.rs b/src/fs/fat32.rs index b19c9908..c1feebdf 100644 --- a/src/fs/fat32.rs +++ b/src/fs/fat32.rs @@ -1,5 +1,4 @@ mod dir; -mod file; use alloc::sync::Arc; use core::ops::Deref; @@ -13,7 +12,7 @@ use itertools::Itertools; use crate::io::{Buffer, ByteBuffer, UninitBuffer}; use crate::kernel::block::{BlockDevice, BlockDeviceRequest}; use crate::kernel::constants::{EINVAL, EIO}; -use crate::kernel::mem::{CachePage, Page, PageExcl, PageExt, PageOffset}; +use crate::kernel::mem::{CachePage, Folio, FolioOwned, PageOffset}; use crate::kernel::timer::Instant; use crate::kernel::vfs::dentry::Dentry; use crate::kernel::vfs::inode::{Ino, InodeInfo, InodeOps, InodeUse}; @@ -114,7 +113,7 @@ struct FatFs { impl SuperBlock for FatFs {} impl FatFs { - async fn read_cluster(&self, mut cluster: Cluster, buf: &Page) -> KResult<()> { + async fn read_cluster(&self, mut cluster: Cluster, buf: &Folio) -> KResult<()> { cluster = cluster.normalized(); let rq = BlockDeviceRequest::Read { @@ -278,7 +277,6 @@ impl InodeOps for FileInode { .next() .ok_or(EIO)?; - let page = page.get_page(); fs.read_cluster(cluster, &page).await?; let real_len = (inode.info.lock().size as usize) - offset.byte_count(); @@ -293,7 +291,7 @@ impl InodeOps for FileInode { struct DirInode { // TODO: Use the new PageCache... - dir_pages: RwLock>, + dir_pages: RwLock>, } impl DirInode { @@ -330,7 +328,7 @@ impl DirInode { let clusters = ClusterIterator::new(fat.as_ref(), Cluster::from_ino(inode.ino)); for cluster in clusters { - let page = PageExcl::alloc(); + let page = FolioOwned::alloc(); fs.read_cluster(cluster, &page).await?; dir_pages.push(page); @@ -343,7 +341,7 @@ impl DirInode { &self, sb: &SbUse, inode: &InodeUse, - ) -> KResult> + use<'_>> { + ) -> KResult> + use<'_>> { { let dir_pages = self.dir_pages.read().await; if !dir_pages.is_empty() { diff --git a/src/fs/fat32/file.rs b/src/fs/fat32/file.rs deleted file mode 100644 index 2df69728..00000000 --- a/src/fs/fat32/file.rs +++ /dev/null @@ -1,24 +0,0 @@ -use futures::Stream; - -use crate::{kernel::mem::Page, prelude::KResult}; - -use super::{ClusterIterator, FatFs}; - -pub trait ReadClusters { - fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send; -} - -impl ReadClusters for ClusterIterator<'_> { - fn read_clusters(self, fs: &FatFs) -> impl Stream> + Send { - futures::stream::unfold(self, move |mut me| async { - let cluster = me.next()?; - let page = Page::alloc(); - - if let Err(err) = fs.read_cluster(cluster, &page).await { - return Some((Err(err), me)); - } - - Some((Ok(page), me)) - }) - } -} diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs index a1755908..d560a672 100644 --- a/src/fs/tmpfs/file.rs +++ b/src/fs/tmpfs/file.rs @@ -125,7 +125,7 @@ impl InodeOps for FileInode { page: &mut CachePage, _: PageOffset, ) -> KResult<()> { - page.as_bytes_mut().fill(0); + page.lock().as_bytes_mut().fill(0); Ok(()) } diff --git a/src/kernel/block.rs b/src/kernel/block.rs index 8e017336..be2146f8 100644 --- a/src/kernel/block.rs +++ b/src/kernel/block.rs @@ -8,8 +8,7 @@ use async_trait::async_trait; use mbr::MBRPartTable; use super::constants::ENOENT; -use super::mem::paging::Page; -use super::mem::PageExt; +use super::mem::Folio; use super::vfs::types::DeviceId; use crate::io::{Buffer, Chunks, FillResult}; use crate::kernel::constants::{EEXIST, EINVAL}; @@ -202,15 +201,15 @@ impl BlockDevice { let (page_slice, page, mut page_vec); match nr_batch { ..=8 => { - page = Page::alloc(); + page = Folio::alloc(); page_slice = core::slice::from_ref(&page); } ..=16 => { - page = Page::alloc_order(1); + page = Folio::alloc_order(1); page_slice = core::slice::from_ref(&page); } ..=32 => { - page = Page::alloc_order(2); + page = Folio::alloc_order(2); page_slice = core::slice::from_ref(&page); } count => { @@ -220,8 +219,8 @@ impl BlockDevice { let nr_pages = nr_huge_pages + nr_small_pages; page_vec = Vec::with_capacity(nr_pages); - page_vec.resize_with(nr_huge_pages, || Page::alloc_order(2)); - page_vec.resize_with(nr_pages, || Page::alloc()); + page_vec.resize_with(nr_huge_pages, || Folio::alloc_order(2)); + page_vec.resize_with(nr_pages, || Folio::alloc()); page_slice = &page_vec; } } @@ -266,7 +265,7 @@ pub enum BlockDeviceRequest<'lt> { /// Number of sectors to read count: u64, /// Buffer pages to read into - buffer: &'lt [Page], + buffer: &'lt [Folio], }, Write { /// Sector to write to, in 512-byte blocks @@ -274,6 +273,6 @@ pub enum BlockDeviceRequest<'lt> { /// Number of sectors to write count: u64, /// Buffer pages to write from - buffer: &'lt [Page], + buffer: &'lt [Folio], }, } diff --git a/src/kernel/mem.rs b/src/kernel/mem.rs index f8b5dc0b..47b864bb 100644 --- a/src/kernel/mem.rs +++ b/src/kernel/mem.rs @@ -3,14 +3,16 @@ pub mod paging; mod access; mod address; mod allocator; +mod folio; mod mm_area; mod mm_list; mod page_alloc; mod page_cache; pub use access::PhysAccess; +pub use folio::{Folio, FolioOwned, LockedFolio}; pub(self) use mm_area::MMArea; pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission}; pub use page_alloc::{GlobalPageAlloc, RawPage}; pub use page_cache::{CachePage, PageCache, PageOffset}; -pub use paging::{Page, PageBuffer, PageExcl, PageExt}; +pub use paging::PageBuffer; diff --git a/src/kernel/mem/allocator.rs b/src/kernel/mem/allocator.rs index a3676ce0..3a70a8c2 100644 --- a/src/kernel/mem/allocator.rs +++ b/src/kernel/mem/allocator.rs @@ -3,11 +3,12 @@ use core::ptr::NonNull; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::PhysAccess; -use eonix_mm::paging::{PAGE_SIZE_BITS, PFN}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE_BITS, PFN}; use eonix_sync::LazyLock; use slab_allocator::SlabAlloc; -use super::{GlobalPageAlloc, Page, PageExt}; +use super::folio::Folio; +use super::GlobalPageAlloc; static SLAB_ALLOCATOR: LazyLock> = LazyLock::new(|| SlabAlloc::new_in(GlobalPageAlloc)); @@ -18,19 +19,15 @@ unsafe impl GlobalAlloc for Allocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { let size = layout.size().next_power_of_two(); - let result = if size <= 2048 { - SLAB_ALLOCATOR.alloc(size) + if size <= 2048 { + SLAB_ALLOCATOR.alloc(size).as_ptr() } else { - let page_count = size >> PAGE_SIZE_BITS; - let page = Page::alloc_at_least(page_count); - - let ptr = page.get_ptr(); - page.into_raw(); + let folio = Folio::alloc_at_least(size >> PAGE_SIZE_BITS); + let ptr = folio.get_ptr(); + folio.into_raw(); - ptr - }; - - result.as_ptr() + ptr.as_ptr() + } } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { @@ -45,7 +42,8 @@ unsafe impl GlobalAlloc for Allocator { } else { let paddr = ArchPhysAccess::from_ptr(ptr); let pfn = PFN::from(paddr); - Page::from_raw(pfn); + + Folio::from_raw(pfn); }; } } diff --git a/src/kernel/mem/folio.rs b/src/kernel/mem/folio.rs new file mode 100644 index 00000000..6647e1af --- /dev/null +++ b/src/kernel/mem/folio.rs @@ -0,0 +1,210 @@ +use core::fmt; +use core::mem::ManuallyDrop; +use core::ops::Deref; +use core::ptr::NonNull; +use core::sync::atomic::Ordering; + +use eonix_mm::paging::{Folio as FolioTrait, FrameAlloc, GlobalFrameAlloc, Zone, PFN}; + +use super::page_alloc::ZONE; +use super::{GlobalPageAlloc, PhysAccess as _, RawPage}; + +#[repr(transparent)] +pub struct Folio(NonNull); + +#[derive(Debug)] +#[repr(transparent)] +pub struct FolioOwned(Folio); + +#[repr(transparent)] +pub struct LockedFolio<'a>(&'a Folio); + +unsafe impl Send for Folio {} +unsafe impl Sync for Folio {} + +impl Folio { + pub(super) const fn from_mut_page(raw_page: &'static mut RawPage) -> Self { + Self(NonNull::new(raw_page).unwrap()) + } + + /// Allocate a folio of the given *order*. + pub fn alloc_order(order: u32) -> Self { + GlobalPageAlloc::GLOBAL + .alloc_order(order) + .expect("Out of memory") + } + + /// Allocate a folio of order 0 + pub fn alloc() -> Self { + Self::alloc_order(0) + } + + /// Allocate a folio consisting of at least [`count`] pages. + pub fn alloc_at_least(count: usize) -> Self { + GlobalPageAlloc::GLOBAL + .alloc_at_least(count) + .expect("Out of memory") + } + + /// Acquire the ownership of the folio pointed to by [`pfn`], leaving + /// [`refcount`] untouched. + /// + /// # Panic + /// This function will panic if the folio is not within the global zone. + /// + /// # Safety + /// This function is unsafe because it assumes that the caller has to ensure + /// that [`pfn`] points to a valid folio allocated through [`Self::alloc()`] + /// and that the folio have not been freed or deallocated yet. + pub unsafe fn from_raw(pfn: PFN) -> Self { + unsafe { + // SAFETY: The caller ensures that [`pfn`] points to a folio within + // the global zone. + Self(ZONE.get_page(pfn).unwrap_unchecked()) + } + } + + /// Do some work with the folio without touching the reference count with + /// the same restrictions as [`Self::from_raw()`]. + /// + /// # Safety + /// Check [`Self::from_raw()`] for safety requirements. + pub unsafe fn with_raw(pfn: PFN, func: F) -> O + where + F: FnOnce(&Self) -> O, + { + unsafe { + let me = ManuallyDrop::new(Self::from_raw(pfn)); + func(&me) + } + } + + pub fn lock(&self) -> LockedFolio { + // TODO: actually perform the lock... + LockedFolio(self) + } + + /// Get a vmem pointer to the folio data as a byte slice. + pub fn get_bytes_ptr(&self) -> NonNull<[u8]> { + unsafe { + // SAFETY: `self.start()` can't be null. + NonNull::slice_from_raw_parts(self.start().as_ptr(), self.len()) + } + } + + /// Get a vmem pointer to the start of the folio. + pub fn get_ptr(&self) -> NonNull { + self.get_bytes_ptr().cast() + } +} + +impl Deref for Folio { + type Target = RawPage; + + fn deref(&self) -> &Self::Target { + unsafe { + // SAFETY: We don't expose mutable references to the folio. + self.0.as_ref() + } + } +} + +impl Clone for Folio { + fn clone(&self) -> Self { + // SAFETY: Memory order here can be Relaxed is for the same reason as + // that in the copy constructor of `std::shared_ptr`. + self.refcount.fetch_add(1, Ordering::Relaxed); + + Self(self.0) + } +} + +impl Drop for Folio { + fn drop(&mut self) { + match self.refcount.fetch_sub(1, Ordering::AcqRel) { + 0 => unreachable!("Refcount for an in-use page is 0"), + 1 => unsafe { GlobalPageAlloc::GLOBAL.dealloc_raw(self.0.as_mut()) }, + _ => {} + } + } +} + +impl fmt::Debug for Folio { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Page({:?}, order={})", self.pfn(), self.order) + } +} + +impl FolioTrait for Folio { + fn pfn(&self) -> PFN { + ZONE.get_pfn(self.0.as_ptr()) + } + + fn order(&self) -> u32 { + self.order + } +} + +impl LockedFolio<'_> { + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts(self.start().as_ptr().as_ptr(), self.len()) + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: `self.start()` points to valid memory of length `self.len()`. + core::slice::from_raw_parts_mut(self.start().as_ptr().as_ptr(), self.len()) + } + } +} + +impl Deref for LockedFolio<'_> { + type Target = Folio; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl FolioOwned { + pub fn alloc() -> Self { + Self(Folio::alloc()) + } + + pub fn alloc_order(order: u32) -> Self { + Self(Folio::alloc_order(order)) + } + + pub fn alloc_at_least(count: usize) -> Self { + Self(Folio::alloc_at_least(count)) + } + + pub fn as_bytes(&self) -> &[u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_ref() + } + } + + pub fn as_bytes_mut(&mut self) -> &mut [u8] { + unsafe { + // SAFETY: The page is exclusively owned by us. + self.get_bytes_ptr().as_mut() + } + } + + pub fn share(self) -> Folio { + self.0 + } +} + +impl Deref for FolioOwned { + type Target = Folio; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/src/kernel/mem/mm_area.rs b/src/kernel/mem/mm_area.rs index 2891dad8..782c5ef7 100644 --- a/src/kernel/mem/mm_area.rs +++ b/src/kernel/mem/mm_area.rs @@ -1,15 +1,16 @@ use core::borrow::Borrow; use core::cell::UnsafeCell; use core::cmp; +use core::sync::atomic::Ordering; use eonix_mm::address::{AddrOps as _, VAddr, VRange}; use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; -use eonix_mm::paging::PFN; +use eonix_mm::paging::{Folio as _, PFN}; use super::mm_list::EMPTY_PAGE; -use super::{Mapping, Page, Permission}; -use crate::kernel::mem::page_cache::PageOffset; -use crate::kernel::mem::{CachePage, PageExcl, PageExt}; +use super::{Mapping, Permission}; +use crate::kernel::mem::folio::Folio; +use crate::kernel::mem::{CachePage, FolioOwned, PageOffset}; use crate::prelude::KResult; #[derive(Debug)] @@ -98,8 +99,10 @@ impl MMArea { attr.remove(PageAttribute::COPY_ON_WRITE); attr.set(PageAttribute::WRITE, self.permission.write); - let page = unsafe { Page::from_raw(*pfn) }; - if page.is_exclusive() { + let page = unsafe { Folio::from_raw(*pfn) }; + + // XXX: Change me!!! + if page.refcount.load(Ordering::Relaxed) == 1 { // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock // held, there couldn't be neither other processes sharing the page, nor other // threads making the page COW at the same time. @@ -109,9 +112,13 @@ impl MMArea { let mut new_page; if *pfn == EMPTY_PAGE.pfn() { - new_page = PageExcl::zeroed(); + new_page = { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio + }; } else { - new_page = PageExcl::alloc(); + new_page = FolioOwned::alloc(); unsafe { // SAFETY: `page` is CoW, which means that others won't write to it. @@ -123,7 +130,7 @@ impl MMArea { } attr.remove(PageAttribute::ACCESSED); - *pfn = new_page.into_page().into_raw(); + *pfn = new_page.share().into_raw(); } /// # Arguments @@ -143,11 +150,11 @@ impl MMArea { let file_offset = file_mapping.offset + offset; - let map_page = |page: &Page, cache_page: &CachePage| { + let map_page = |cache_page: &CachePage| { if !self.permission.write { assert!(!write, "Write fault on read-only mapping"); - *pfn = page.clone().into_raw(); + *pfn = cache_page.add_mapping(); return; } @@ -157,26 +164,26 @@ impl MMArea { // So here we can set the dirty flag now. cache_page.set_dirty(true); attr.insert(PageAttribute::WRITE); - *pfn = page.clone().into_raw(); + *pfn = cache_page.add_mapping(); return; } if !write { // Delay the copy-on-write until write fault happens. attr.insert(PageAttribute::COPY_ON_WRITE); - *pfn = page.clone().into_raw(); + *pfn = cache_page.add_mapping(); return; } // XXX: Change this. Let's handle mapped pages before CoW pages. // Nah, we are writing to a mapped private mapping... - let mut new_page = PageExcl::zeroed(); + let mut new_page = FolioOwned::alloc(); new_page .as_bytes_mut() - .copy_from_slice(page.lock().as_bytes()); + .copy_from_slice(cache_page.lock().as_bytes()); attr.insert(PageAttribute::WRITE); - *pfn = new_page.into_page().into_raw(); + *pfn = new_page.share().into_raw(); }; file_mapping diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 17dc1b05..5221c73b 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -1,31 +1,33 @@ mod mapping; mod page_fault; +mod page_table; use alloc::collections::btree_set::BTreeSet; use core::fmt; use core::sync::atomic::{AtomicUsize, Ordering}; use eonix_hal::mm::{ - flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchPagingMode, - ArchPhysAccess, GLOBAL_PAGE_TABLE, + flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, GLOBAL_PAGE_TABLE, }; use eonix_mm::address::{Addr as _, AddrOps as _, PAddr, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PageTable, RawAttribute, PTE}; -use eonix_mm::paging::{PAGE_SIZE, PFN}; +use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE, PFN}; use eonix_sync::{LazyLock, Mutex}; pub use mapping::{FileMapping, Mapping}; pub use page_fault::handle_kernel_page_fault; +use page_table::KernelPageTable; use super::address::{VAddrExt as _, VRangeExt as _}; -use super::page_alloc::GlobalPageAlloc; -use super::paging::AllocZeroed as _; -use super::{MMArea, Page, PageExt}; +use super::{Folio, FolioOwned, MMArea}; use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM}; -use crate::kernel::mem::page_alloc::RawPagePtr; use crate::prelude::*; use crate::sync::ArcSwap; -pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| Page::zeroed()); +pub static EMPTY_PAGE: LazyLock = LazyLock::new(|| { + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); + folio.share() +}); #[derive(Debug, Clone, Copy)] pub struct Permission { @@ -34,23 +36,21 @@ pub struct Permission { pub execute: bool, } -pub type KernelPageTable<'a> = PageTable<'a, ArchPagingMode, GlobalPageAlloc, ArchPhysAccess>; - -struct MMListInner<'a> { +struct MMListInner { areas: BTreeSet, - page_table: KernelPageTable<'a>, + page_table: KernelPageTable, break_start: Option, break_pos: Option, } pub struct MMList { - inner: ArcSwap>>, + inner: ArcSwap>, user_count: AtomicUsize, /// Only used in kernel space to switch page tables on context switch. root_page_table: AtomicUsize, } -impl MMListInner<'_> { +impl MMListInner { fn overlapping_addr(&self, addr: VAddr) -> Option<&MMArea> { self.areas.get(&VRange::from(addr)) } @@ -96,7 +96,7 @@ impl MMListInner<'_> { } } - fn unmap(&mut self, start: VAddr, len: usize) -> KResult> { + fn unmap(&mut self, start: VAddr, len: usize) -> KResult> { assert_eq!(start.floor(), start); let end = (start + len).ceil(); let range_to_unmap = VRange::new(start, end); @@ -120,7 +120,7 @@ impl MMListInner<'_> { let (pfn, _) = pte.take(); pages_to_free.push(unsafe { // SAFETY: We got the pfn from a valid page table entry, so it should be valid. - Page::from_raw(pfn) + Folio::from_raw(pfn) }); } @@ -275,23 +275,23 @@ impl MMListInner<'_> { } } -impl Drop for MMListInner<'_> { +impl Drop for MMListInner { fn drop(&mut self) { // May buggy for area in &self.areas { if area.is_shared { for pte in self.page_table.iter_user(area.range()) { - let (pfn, _) = pte.take(); - let raw_page = RawPagePtr::from(pfn); - if raw_page.refcount().fetch_sub(1, Ordering::Relaxed) == 1 { - // Wrong here - // unsafe { Page::from_raw(pfn) }; - } + // XXX: Fix me + let _ = pte.take(); + // let raw_page = RawPagePtr::from(pfn); + // if raw_page.refcount().fetch_sub(1, Ordering::Relaxed) == 1 { + // unsafe { Page::from_raw(pfn) }; + // } } } else { for pte in self.page_table.iter_user(area.range()) { let (pfn, _) = pte.take(); - unsafe { Page::from_raw(pfn) }; + unsafe { Folio::from_raw(pfn) }; } } } @@ -327,7 +327,7 @@ impl MMList { } pub fn new() -> Self { - let page_table = GLOBAL_PAGE_TABLE.clone_global(); + let page_table = KernelPageTable::new(); Self { root_page_table: AtomicUsize::from(page_table.addr().addr()), user_count: AtomicUsize::new(0), @@ -344,7 +344,7 @@ impl MMList { let inner = self.inner.borrow(); let mut inner = inner.lock().await; - let page_table = GLOBAL_PAGE_TABLE.clone_global(); + let page_table = KernelPageTable::new(); let list = Self { root_page_table: AtomicUsize::from(page_table.addr().addr()), user_count: AtomicUsize::new(0), @@ -392,7 +392,7 @@ impl MMList { } pub fn deactivate(&self) { - set_root_page_table_pfn(PFN::from(GLOBAL_PAGE_TABLE.addr())); + set_root_page_table_pfn(PFN::from(GLOBAL_PAGE_TABLE.start())); let old_user_count = self.user_count.fetch_sub(1, Ordering::Release); assert_ne!(old_user_count, 0); @@ -444,7 +444,7 @@ impl MMList { let new_root_page_table = match &new { Some(new_mm) => new_mm.root_page_table.load(Ordering::Relaxed), - None => GLOBAL_PAGE_TABLE.addr().addr(), + None => GLOBAL_PAGE_TABLE.start().addr(), }; set_root_page_table_pfn(PFN::from(PAddr::from(new_root_page_table))); @@ -693,7 +693,7 @@ impl MMList { unsafe { // SAFETY: We are sure that the page is valid and we have the right to access it. - Page::with_raw(pte.get_pfn(), |page| { + Folio::with_raw(pte.get_pfn(), |page| { let mut pg = page.lock(); let page_data = &mut pg.as_bytes_mut()[start_offset..end_offset]; @@ -724,7 +724,7 @@ trait PageTableExt { fn set_copied(&self, from: &Self, range: VRange); } -impl PageTableExt for KernelPageTable<'_> { +impl PageTableExt for KernelPageTable { fn set_anonymous(&self, range: VRange, permission: Permission) { for pte in self.iter_user(range) { pte.set_anonymous(permission.execute); @@ -805,7 +805,7 @@ where let pfn = unsafe { // SAFETY: We get the pfn from a valid page table entry, so it should be valid as well. - Page::with_raw(from.get_pfn(), |page| page.clone().into_raw()) + Folio::with_raw(from.get_pfn(), |page| page.clone().into_raw()) }; self.set(pfn, T::Attr::from(from_attr & !PageAttribute::ACCESSED)); diff --git a/src/kernel/mem/mm_list/page_table.rs b/src/kernel/mem/mm_list/page_table.rs new file mode 100644 index 00000000..8a2acc13 --- /dev/null +++ b/src/kernel/mem/mm_list/page_table.rs @@ -0,0 +1,40 @@ +use core::ops::Deref; + +use eonix_hal::arch_exported::mm::{ArchPagingMode, PageAccessImpl}; +use eonix_hal::mm::GLOBAL_PAGE_TABLE; +use eonix_mm::page_table::PageTable; +use eonix_mm::paging::{Folio, GlobalFrameAlloc}; + +use crate::kernel::mem::{FolioOwned, GlobalPageAlloc, PhysAccess}; + +#[repr(transparent)] +pub struct KernelPageTable(PageTable<'static, ArchPagingMode, GlobalPageAlloc, PageAccessImpl>); + +impl KernelPageTable { + pub fn new() -> Self { + let global_page_table = unsafe { + // SAFETY: The region is valid and read only after initialization. + GLOBAL_PAGE_TABLE.start().as_ptr::<[u8; 4096]>().as_ref() + }; + + let mut table_page = FolioOwned::alloc(); + let entries = table_page.as_bytes_mut().len(); + table_page.as_bytes_mut()[..(entries / 2)].fill(0); + table_page.as_bytes_mut()[(entries / 2)..] + .copy_from_slice(&global_page_table[(entries / 2)..]); + + Self(PageTable::new( + table_page.share(), + GlobalPageAlloc::GLOBAL, + PageAccessImpl, + )) + } +} + +impl Deref for KernelPageTable { + type Target = PageTable<'static, ArchPagingMode, GlobalPageAlloc, PageAccessImpl>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/src/kernel/mem/page_alloc.rs b/src/kernel/mem/page_alloc.rs index 37344fc2..ac2485da 100644 --- a/src/kernel/mem/page_alloc.rs +++ b/src/kernel/mem/page_alloc.rs @@ -5,14 +5,14 @@ use core::sync::atomic::Ordering; use buddy_allocator::BuddyAllocator; use eonix_mm::address::PRange; -use eonix_mm::paging::{ - GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PageList, PageListSized as _, -}; +use eonix_mm::page_table::PageTableAlloc; +use eonix_mm::paging::{FolioList, FolioListSized as _, FrameAlloc, GlobalFrameAlloc, PFN}; use eonix_preempt::PreemptGuard; use eonix_sync::{NoContext, Spin}; -use raw_page::{PageFlags, RawPageList}; -pub use raw_page::{RawPage, RawPagePtr}; -pub use zones::GlobalZone; +pub use raw_page::{PageFlags, RawPage, RawPageList}; +pub use zones::{GlobalZone, ZONE}; + +use super::folio::Folio; const COSTLY_ORDER: u32 = 3; const AREAS: usize = COSTLY_ORDER as usize + 1; @@ -27,9 +27,6 @@ static PERCPU_PAGE_ALLOC: PerCpuPageAlloc = PerCpuPageAlloc::new(); #[derive(Clone)] pub struct GlobalPageAlloc; -#[derive(Clone)] -pub struct BuddyPageAlloc(); - struct PerCpuPageAlloc { batch: u32, free_areas: [RawPageList; AREAS], @@ -72,11 +69,6 @@ impl PerCpuPageAlloc { } impl GlobalPageAlloc { - #[allow(dead_code)] - pub const fn buddy_alloc() -> BuddyPageAlloc { - BuddyPageAlloc() - } - /// Add the pages in the PAddr range `range` to the global allocator. /// /// This function is only to be called on system initialization when `eonix_preempt` @@ -88,15 +80,11 @@ impl GlobalPageAlloc { pub unsafe fn add_pages(range: PRange) { BUDDY_ALLOC .lock_with_context(NoContext) - .create_pages(range.start(), range.end()) + .create_folios(range.start(), range.end()) } -} - -impl PageAlloc for GlobalPageAlloc { - type RawPage = RawPagePtr; - fn alloc_order(&self, order: u32) -> Option { - let raw_page = if order > COSTLY_ORDER { + pub fn alloc_raw_order(&self, order: u32) -> Option<&'static mut RawPage> { + if order > COSTLY_ORDER { BUDDY_ALLOC.lock().alloc_order(order) } else { unsafe { @@ -106,61 +94,54 @@ impl PageAlloc for GlobalPageAlloc { page } - }; - - raw_page.map(|raw_page| { - // SAFETY: Memory order here can be Relaxed is for the same reason - // as that in the copy constructor of `std::shared_ptr`. - raw_page.refcount.fetch_add(1, Ordering::Relaxed); - - RawPagePtr::from_ref(raw_page) - }) + } } - unsafe fn dealloc(&self, page_ptr: RawPagePtr) { + pub unsafe fn dealloc_raw(&self, raw_page: &'static mut RawPage) { assert_eq!( - page_ptr.refcount().load(Ordering::Relaxed), + raw_page.refcount.load(Ordering::Relaxed), 0, "Trying to free a page with refcount > 0" ); - if page_ptr.order() > COSTLY_ORDER { - BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); + if raw_page.order > COSTLY_ORDER { + BUDDY_ALLOC.lock().dealloc(raw_page); } else { - let order = page_ptr.order(); + let order = raw_page.order; unsafe { - PreemptGuard::new(PERCPU_PAGE_ALLOC.as_mut()).free_pages(page_ptr.as_mut(), order); + PreemptGuard::new(PERCPU_PAGE_ALLOC.as_mut()).free_pages(raw_page, order); } } } - - fn has_management_over(&self, page_ptr: RawPagePtr) -> bool { - page_ptr.order() > COSTLY_ORDER || page_ptr.flags().has(PageFlags::LOCAL) - } } -impl GlobalPageAllocTrait for GlobalPageAlloc { - fn global() -> Self { - GlobalPageAlloc +impl FrameAlloc for GlobalPageAlloc { + type Folio = Folio; + + fn alloc_order(&self, order: u32) -> Option { + self.alloc_raw_order(order).map(|raw_page| { + // SAFETY: Memory order here can be Relaxed is for the same reason + // as that in the copy constructor of `std::shared_ptr`. + + raw_page.refcount.fetch_add(1, Ordering::Relaxed); + Folio::from_mut_page(raw_page) + }) } } -impl PageAlloc for BuddyPageAlloc { - type RawPage = RawPagePtr; +impl GlobalFrameAlloc for GlobalPageAlloc { + const GLOBAL: Self = GlobalPageAlloc; +} - fn alloc_order(&self, order: u32) -> Option { - BUDDY_ALLOC - .lock() - .alloc_order(order) - .map(|raw_page| RawPagePtr::from_ref(raw_page)) - } +impl PageTableAlloc for GlobalPageAlloc { + type Folio = Folio; - unsafe fn dealloc(&self, page_ptr: RawPagePtr) { - BUDDY_ALLOC.lock().dealloc(page_ptr.as_mut()); + fn alloc(&self) -> Self::Folio { + FrameAlloc::alloc(self).unwrap() } - fn has_management_over(&self, _: RawPagePtr) -> bool { - true + unsafe fn from_raw(&self, pfn: PFN) -> Self::Folio { + unsafe { Folio::from_raw(pfn) } } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 074f82c7..0d775245 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -1,20 +1,17 @@ use core::ptr::NonNull; use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering}; -use buddy_allocator::BuddyPage; +use buddy_allocator::BuddyFolio; use eonix_hal::mm::ArchPhysAccess; use eonix_mm::address::{PAddr, PhysAccess as _}; -use eonix_mm::paging::{PageAlloc, PageList, PageListSized, RawPage as RawPageTrait, PFN}; +use eonix_mm::paging::{FolioList, FolioListSized, Zone, PFN}; use intrusive_list::{container_of, Link, List}; use slab_allocator::{SlabPage, SlabPageAlloc, SlabSlot}; +use super::zones::ZONE; use super::{GlobalPageAlloc, PerCpuPage}; -use crate::kernel::mem::page_cache::PageCacheRawPage; use crate::kernel::mem::PhysAccess; -pub const PAGE_ARRAY: NonNull = - unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; - pub struct PageFlags(AtomicU32); #[derive(Clone, Copy)] @@ -41,11 +38,11 @@ pub struct RawPage { /// This can be used for LRU page swap in the future. /// /// Now only used for free page links in the buddy system. - link: Link, + pub link: Link, /// # Safety /// This field is only used in buddy system and is protected by the global lock. - order: u32, - flags: PageFlags, + pub order: u32, + pub flags: PageFlags, pub refcount: AtomicUsize, shared_data: PageData, @@ -55,9 +52,6 @@ pub struct RawPage { unsafe impl Send for RawPage {} unsafe impl Sync for RawPage {} -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct RawPagePtr(NonNull); - impl PageFlags { pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; @@ -85,80 +79,9 @@ impl PageFlags { } } -impl RawPagePtr { - pub const fn from_ref(raw_page_ref: &RawPage) -> Self { - Self::new(unsafe { - // SAFETY: Rust references always points to non-null addresses. - NonNull::new_unchecked(&raw const *raw_page_ref as *mut _) - }) - } - - pub const fn new(ptr: NonNull) -> Self { - Self(ptr) - } - - /// Get a raw pointer to the underlying `RawPage` struct. - /// - /// # Safety - /// Doing arithmetic on the pointer returned will cause immediate undefined behavior. - pub const unsafe fn as_ptr(self) -> *mut RawPage { - self.0.as_ptr() - } - - pub const fn as_ref<'a>(self) -> &'a RawPage { - unsafe { &*self.as_ptr() } - } - - pub const fn as_mut<'a>(self) -> &'a mut RawPage { - unsafe { &mut *self.as_ptr() } - } - - pub const fn order(&self) -> u32 { - self.as_ref().order - } - - pub const fn flags(&self) -> &PageFlags { - &self.as_ref().flags - } - - pub const fn refcount(&self) -> &AtomicUsize { - &self.as_ref().refcount - } - - // return the ptr point to the actually raw page - pub fn real_ptr(&self) -> NonNull { - let pfn = unsafe { PFN::from(RawPagePtr(NonNull::new_unchecked(self.as_ptr()))) }; - unsafe { PAddr::from(pfn).as_ptr::() } - } -} - -impl From for PFN { - fn from(value: RawPagePtr) -> Self { - let idx = unsafe { value.as_ptr().offset_from(PAGE_ARRAY.as_ptr()) as usize }; - Self::from(idx) - } -} - -impl From for RawPagePtr { - fn from(pfn: PFN) -> Self { - let raw_page_ptr = unsafe { PAGE_ARRAY.add(usize::from(pfn)) }; - Self::new(raw_page_ptr) - } -} - -impl RawPageTrait for RawPagePtr { - fn order(&self) -> u32 { - self.order() - } - - fn refcount(&self) -> &AtomicUsize { - self.refcount() - } -} - -impl BuddyPage for RawPage { +impl BuddyFolio for RawPage { fn pfn(&self) -> PFN { - PFN::from(RawPagePtr::from_ref(self)) + ZONE.get_pfn(self) } fn get_order(&self) -> u32 { @@ -184,8 +107,7 @@ impl BuddyPage for RawPage { impl SlabPage for RawPage { fn get_data_ptr(&self) -> NonNull<[u8]> { - let raw_page_ptr = RawPagePtr::from_ref(self); - let paddr_start = PAddr::from(PFN::from(raw_page_ptr)); + let paddr_start = PAddr::from(ZONE.get_pfn(self)); let page_data_ptr = unsafe { paddr_start.as_ptr() }; NonNull::slice_from_raw_parts(page_data_ptr, 1 << (self.order + 12)) @@ -233,21 +155,9 @@ impl SlabPage for RawPage { let paddr = ArchPhysAccess::from_ptr(ptr); let pfn = PFN::from(paddr); - RawPagePtr::from(pfn).as_mut() - } - } -} - -impl PageCacheRawPage for RawPagePtr { - fn is_dirty(&self) -> bool { - self.flags().has(PageFlags::DIRTY) - } - - fn set_dirty(&self, dirty: bool) { - if dirty { - self.flags().set(PageFlags::DIRTY); - } else { - self.flags().clear(PageFlags::DIRTY); + ZONE.get_page(pfn) + .expect("Page outside of the global zone") + .as_mut() } } } @@ -264,14 +174,16 @@ impl PerCpuPage for RawPage { pub struct RawPageList(List); -impl PageList for RawPageList { - type Page = RawPage; +unsafe impl Send for RawPageList {} + +impl FolioList for RawPageList { + type Folio = RawPage; fn is_empty(&self) -> bool { self.0.is_empty() } - fn peek_head(&mut self) -> Option<&mut Self::Page> { + fn peek_head(&mut self) -> Option<&mut Self::Folio> { unsafe { let link = self.0.head()?; let mut raw_page_ptr = container_of!(link, RawPage, link); @@ -280,7 +192,7 @@ impl PageList for RawPageList { } } - fn pop_head(&mut self) -> Option<&'static mut Self::Page> { + fn pop_head(&mut self) -> Option<&'static mut Self::Folio> { unsafe { let link = self.0.pop()?; let mut raw_page_ptr = container_of!(link, RawPage, link); @@ -289,25 +201,25 @@ impl PageList for RawPageList { } } - fn push_tail(&mut self, page: &'static mut Self::Page) { + fn push_tail(&mut self, page: &'static mut Self::Folio) { self.0.insert(&mut page.link); } - fn remove(&mut self, page: &mut Self::Page) { + fn remove(&mut self, page: &mut Self::Folio) { self.0.remove(&mut page.link) } } -impl PageListSized for RawPageList { +impl FolioListSized for RawPageList { const NEW: Self = RawPageList(List::new()); } -impl SlabPageAlloc for GlobalPageAlloc { +unsafe impl SlabPageAlloc for GlobalPageAlloc { type Page = RawPage; type PageList = RawPageList; - unsafe fn alloc_uninit(&self) -> &'static mut RawPage { - let raw_page = self.alloc().expect("Out of memory").as_mut(); + fn alloc_slab_page(&self) -> &'static mut RawPage { + let raw_page = self.alloc_raw_order(0).expect("Out of memory"); raw_page.flags.set(PageFlags::SLAB); raw_page.shared_data.slab = SlabPageData::new(); diff --git a/src/kernel/mem/page_alloc/zones.rs b/src/kernel/mem/page_alloc/zones.rs index 7a2e4e33..032b9cd0 100644 --- a/src/kernel/mem/page_alloc/zones.rs +++ b/src/kernel/mem/page_alloc/zones.rs @@ -1,13 +1,23 @@ -use core::cell::UnsafeCell; +use core::ptr::NonNull; use eonix_mm::address::PRange; use eonix_mm::paging::{Zone, PFN}; use super::RawPage; -use crate::kernel::mem::page_alloc::RawPagePtr; + +pub static ZONE: GlobalZone = GlobalZone(); + +const PAGE_ARRAY: NonNull = + unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) }; pub struct GlobalZone(); +impl GlobalZone { + pub fn get_pfn(&self, page_ptr: *const RawPage) -> PFN { + PFN::from(unsafe { page_ptr.offset_from(PAGE_ARRAY.as_ptr()) as usize }) + } +} + impl Zone for GlobalZone { type Page = RawPage; @@ -15,11 +25,7 @@ impl Zone for GlobalZone { true } - fn get_page(&self, pfn: PFN) -> Option<&UnsafeCell> { - unsafe { - // SAFETY: The pointer returned by [`RawPagePtr::as_ptr()`] is valid. - // And so is it wrapped with [`UnsafeCell`] - Some(&*(RawPagePtr::from(pfn).as_ptr() as *const UnsafeCell)) - } + fn get_page(&self, pfn: PFN) -> Option> { + Some(unsafe { PAGE_ARRAY.add(usize::from(pfn)) }) } } diff --git a/src/kernel/mem/page_cache.rs b/src/kernel/mem/page_cache.rs index 214c65a5..3fe33d5b 100644 --- a/src/kernel/mem/page_cache.rs +++ b/src/kernel/mem/page_cache.rs @@ -1,19 +1,16 @@ use alloc::collections::btree_map::{BTreeMap, Entry}; use core::future::Future; -use core::mem::ManuallyDrop; +use core::ops::{Deref, DerefMut}; -use eonix_hal::mm::ArchPhysAccess; -use eonix_mm::address::{PAddr, PhysAccess}; -use eonix_mm::paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; +use eonix_mm::paging::{Folio as _, PAGE_SIZE, PAGE_SIZE_BITS, PFN}; use eonix_sync::Mutex; -use super::Page; +use super::page_alloc::PageFlags; +use super::{Folio, FolioOwned}; use crate::io::{Buffer, Stream}; use crate::kernel::constants::EINVAL; -use crate::kernel::mem::page_alloc::RawPagePtr; use crate::kernel::vfs::inode::InodeUse; use crate::prelude::KResult; -use crate::GlobalPageAlloc; #[repr(transparent)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -24,14 +21,7 @@ pub struct PageCache { inode: InodeUse, } -unsafe impl Send for PageCache {} -unsafe impl Sync for PageCache {} - -#[derive(Clone, Copy)] -pub struct CachePage(RawPagePtr); - -unsafe impl Send for CachePage {} -unsafe impl Sync for CachePage {} +pub struct CachePage(Folio); impl PageOffset { pub const fn from_byte_floor(offset: usize) -> Self { @@ -57,39 +47,47 @@ impl PageOffset { impl CachePage { pub fn new() -> Self { - Self(GlobalPageAlloc.alloc().unwrap()) + CachePage(Folio::alloc()) } - pub fn as_bytes(&self) -> &[u8] { - unsafe { - core::slice::from_raw_parts( - // SAFETY: The page is owned by us, so we can safely access its data. - ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), - PAGE_SIZE, - ) - } - } + pub fn new_zeroed() -> Self { + CachePage({ + let mut folio = FolioOwned::alloc(); + folio.as_bytes_mut().fill(0); - pub fn as_bytes_mut(&mut self) -> &mut [u8] { - unsafe { - core::slice::from_raw_parts_mut( - // SAFETY: The page is exclusively owned by us, so we can safely access its data. - ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(), - PAGE_SIZE, - ) - } + folio.share() + }) } pub fn is_dirty(&self) -> bool { - self.0.is_dirty() + self.flags.has(PageFlags::DIRTY) } pub fn set_dirty(&self, dirty: bool) { - self.0.set_dirty(dirty); + if dirty { + self.flags.set(PageFlags::DIRTY); + } else { + self.flags.clear(PageFlags::DIRTY); + } + } + + pub fn add_mapping(&self) -> PFN { + // TODO: Increase map_count + self.0.clone().into_raw() } +} + +impl Deref for CachePage { + type Target = Folio; - pub fn get_page(&self) -> Page { - unsafe { Page::with_raw(PFN::from(self.0), |page| page.clone()) } + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for CachePage { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 } } @@ -124,11 +122,7 @@ impl PageCache { } // TODO: Remove this. - pub async fn with_page( - &self, - pgoff: PageOffset, - func: impl FnOnce(&Page, &CachePage), - ) -> KResult<()> { + pub async fn with_page(&self, pgoff: PageOffset, func: impl FnOnce(&CachePage)) -> KResult<()> { let mut pages = self.pages.lock().await; if pgoff > PageOffset::from_byte_ceil(self.len()) { return Err(EINVAL); @@ -136,11 +130,7 @@ impl PageCache { let cache_page = self.get_page_locked(&mut pages, pgoff).await?; - unsafe { - let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(cache_page.0))); - - func(&page, cache_page); - } + func(cache_page); Ok(()) } @@ -166,7 +156,7 @@ impl PageCache { let data_len = real_end - offset; if buffer - .fill(&page.as_bytes()[inner_offset..inner_offset + data_len])? + .fill(&page.lock().as_bytes()[inner_offset..inner_offset + data_len])? .should_stop() || buffer.available() == 0 { @@ -195,7 +185,7 @@ impl PageCache { let inner_offset = offset % PAGE_SIZE; let written = stream - .poll_data(&mut page.as_bytes_mut()[inner_offset..])? + .poll_data(&mut page.lock().as_bytes_mut()[inner_offset..])? .map(|b| b.len()) .unwrap_or(0); @@ -237,14 +227,9 @@ impl core::fmt::Debug for PageCache { } } -pub trait PageCacheRawPage: RawPage { - fn is_dirty(&self) -> bool; - fn set_dirty(&self, dirty: bool); -} - impl Drop for PageCache { fn drop(&mut self) { - // TODO: Write back dirty pages... - // let _ = self.fsync(); + // XXX: Send the PageCache to some flusher worker. + let _ = self.fsync(); } } diff --git a/src/kernel/mem/paging.rs b/src/kernel/mem/paging.rs index bca573fb..1b95ce79 100644 --- a/src/kernel/mem/paging.rs +++ b/src/kernel/mem/paging.rs @@ -1,48 +1,22 @@ -use core::ops::Deref; -use core::ptr::NonNull; +use eonix_mm::paging::Folio as _; -use eonix_mm::paging::Page as GenericPage; - -use super::page_alloc::GlobalPageAlloc; -use super::PhysAccess; +use super::folio::FolioOwned; use crate::io::{Buffer, FillResult}; -pub type Page = GenericPage; - /// A buffer that wraps a page and provides a `Buffer` interface. pub struct PageBuffer { - page: PageExcl, + page: FolioOwned, offset: usize, } -pub struct PageLocked<'a> { - page: &'a Page, -} - -/// A page that is exclusively owned. -#[repr(transparent)] -pub struct PageExcl(Page); - pub trait AllocZeroed { fn zeroed() -> Self; } -pub trait PageExt { - fn lock(&self) -> PageLocked; - - /// Get a vmem pointer to the page data as a byte slice. - fn get_bytes_ptr(&self) -> NonNull<[u8]>; - - /// Get a vmem pointer to the start of the page. - fn get_ptr(&self) -> NonNull { - self.get_bytes_ptr().cast() - } -} - impl PageBuffer { pub fn new() -> Self { Self { - page: PageExcl::alloc(), + page: FolioOwned::alloc(), offset: 0, } } @@ -86,91 +60,3 @@ impl Buffer for PageBuffer { } } } - -impl AllocZeroed for Page { - fn zeroed() -> Self { - let page = Self::alloc(); - - page.lock().as_bytes_mut().fill(0); - - page - } -} - -impl PageExt for Page { - fn lock(&self) -> PageLocked { - // TODO: Actually perform the lock. - PageLocked { page: self } - } - - fn get_bytes_ptr(&self) -> NonNull<[u8]> { - unsafe { - // SAFETY: `self.start()` can't be null. - NonNull::slice_from_raw_parts(self.start().as_ptr(), self.len()) - } - } -} - -impl PageLocked<'_> { - pub fn as_bytes(&self) -> &[u8] { - unsafe { - // SAFETY: `self.start()` points to valid memory of length `self.len()`. - core::slice::from_raw_parts(self.start().as_ptr().as_ptr(), self.len()) - } - } - - pub fn as_bytes_mut(&mut self) -> &mut [u8] { - unsafe { - // SAFETY: `self.start()` points to valid memory of length `self.len()`. - core::slice::from_raw_parts_mut(self.start().as_ptr().as_ptr(), self.len()) - } - } -} - -impl Deref for PageLocked<'_> { - type Target = Page; - - fn deref(&self) -> &Self::Target { - self.page - } -} - -impl PageExcl { - pub fn alloc() -> Self { - Self(Page::alloc()) - } - - pub fn alloc_order(order: u32) -> Self { - Self(Page::alloc_order(order)) - } - - pub fn zeroed() -> Self { - Self(Page::zeroed()) - } - - pub fn as_bytes(&self) -> &[u8] { - unsafe { - // SAFETY: The page is exclusively owned by us. - self.get_bytes_ptr().as_ref() - } - } - - pub fn as_bytes_mut(&mut self) -> &mut [u8] { - unsafe { - // SAFETY: The page is exclusively owned by us. - self.get_bytes_ptr().as_mut() - } - } - - pub fn into_page(self) -> Page { - self.0 - } -} - -impl Deref for PageExcl { - type Target = Page; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} diff --git a/src/kernel/task/kernel_stack.rs b/src/kernel/task/kernel_stack.rs index d3e9de2f..f00b91bd 100644 --- a/src/kernel/task/kernel_stack.rs +++ b/src/kernel/task/kernel_stack.rs @@ -1,11 +1,12 @@ -use crate::kernel::mem::{paging::Page, PhysAccess as _}; -use core::{num::NonZero, ptr::NonNull}; +use core::ptr::NonNull; + use eonix_runtime::executor::Stack; +use crate::kernel::mem::FolioOwned; + #[derive(Debug)] pub struct KernelStack { - _pages: Page, - bottom: NonZero, + folio: FolioOwned, } impl KernelStack { @@ -14,15 +15,8 @@ impl KernelStack { const KERNEL_STACK_ORDER: u32 = 7; pub fn new() -> Self { - let pages = Page::alloc_order(Self::KERNEL_STACK_ORDER); - let bottom = unsafe { - // SAFETY: The paddr is from a page, which should be valid. - pages.range().end().as_ptr::().addr() - }; - Self { - _pages: pages, - bottom, + folio: FolioOwned::alloc_order(Self::KERNEL_STACK_ORDER), } } } @@ -33,7 +27,10 @@ impl Stack for KernelStack { } fn get_bottom(&self) -> NonNull<()> { - // SAFETY: The stack is allocated and `bottom` is non-zero. - unsafe { NonNull::new_unchecked(self.bottom.get() as *mut _) } + let ptr = self.folio.get_bytes_ptr(); + let len = ptr.len(); + + // SAFETY: The vaddr of the folio is guaranteed to be non-zero. + unsafe { ptr.cast().byte_add(len) } } } diff --git a/src/kernel/vfs/file/mod.rs b/src/kernel/vfs/file/mod.rs index eb00cc4c..799b9848 100644 --- a/src/kernel/vfs/file/mod.rs +++ b/src/kernel/vfs/file/mod.rs @@ -15,7 +15,7 @@ pub use terminal_file::TerminalFile; use crate::io::{Buffer, ByteBuffer, Chunks, IntoStream, Stream}; use crate::kernel::constants::{EBADF, EINTR, EINVAL, ENOTTY}; -use crate::kernel::mem::PageExcl; +use crate::kernel::mem::FolioOwned; use crate::kernel::task::Thread; use crate::kernel::CharDevice; use crate::prelude::KResult; @@ -94,7 +94,7 @@ impl FileType { } pub async fn sendfile(&self, dest_file: &Self, count: usize) -> KResult { - let mut buffer_page = PageExcl::alloc(); + let mut buffer_page = FolioOwned::alloc(); let buffer = buffer_page.as_bytes_mut(); self.sendfile_check()?; diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 93b6da20..65af41e4 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -1,32 +1,26 @@ +use eonix_hal::arch_exported::mm::{ArchPagingMode, PageAccessImpl}; use eonix_hal::bootstrap::BootStrapData; -use eonix_hal::mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE}; +use eonix_hal::mm::{ArchMemory, BasicPageAllocRef, GLOBAL_PAGE_TABLE}; use eonix_hal::traits::mm::Memory; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PagingMode as _, PTE}; -use eonix_mm::paging::{Page as GenericPage, PAGE_SIZE, PFN}; +use eonix_mm::page_table::{PageAttribute, PageTable, PTE}; +use eonix_mm::paging::{Folio as _, FrameAlloc, PAGE_SIZE, PFN}; use crate::kernel::mem::{GlobalPageAlloc, RawPage}; -pub fn setup_memory(data: &mut BootStrapData) { - let addr_max = ArchMemory::present_ram() - .map(|range| range.end()) - .max() - .expect("No free memory"); - - let pfn_max = PFN::from(addr_max.ceil()); - let len_bytes_page_array = usize::from(pfn_max) * size_of::(); - let count_pages = len_bytes_page_array.div_ceil(PAGE_SIZE); - - let alloc = data.get_alloc().unwrap(); +fn setup_kernel_page_array(alloc: BasicPageAllocRef, count_pages: usize) { + // TODO: This should be done by the global Zone + let global_page_table = PageTable::::new( + GLOBAL_PAGE_TABLE.clone(), + alloc.clone(), + PageAccessImpl, + ); // Map kernel page array. const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000); - for pte in GLOBAL_PAGE_TABLE.iter_kernel_in( - VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages), - ArchPagingMode::LEVELS, - &alloc, - ) { + let range = VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages); + for pte in global_page_table.iter_kernel(range) { let attr = PageAttribute::PRESENT | PageAttribute::WRITE | PageAttribute::READ @@ -34,10 +28,15 @@ pub fn setup_memory(data: &mut BootStrapData) { | PageAttribute::ACCESSED | PageAttribute::DIRTY; - let page = GenericPage::alloc_in(&alloc); + let page = alloc.alloc().unwrap(); pte.set(page.into_raw(), attr.into()); } + // TODO!!!: Construct the global zone with all present ram. + // for range in ArchMemory::present_ram() { + // GlobalPageAlloc::mark_present(range); + // } + unsafe { // SAFETY: We've just mapped the area with sufficient length. core::ptr::write_bytes( @@ -47,10 +46,21 @@ pub fn setup_memory(data: &mut BootStrapData) { ); } - // TODO!!!: Construct the global zone with all present ram. - // for range in ArchMemory::present_ram() { - // GlobalPageAlloc::mark_present(range); - // } + core::mem::forget(global_page_table); +} + +pub fn setup_memory(data: &mut BootStrapData) { + let addr_max = ArchMemory::present_ram() + .map(|range| range.end()) + .max() + .expect("No free memory"); + + let pfn_max = PFN::from(addr_max.ceil()); + let len_bytes_page_array = usize::from(pfn_max) * size_of::(); + let count_pages = len_bytes_page_array.div_ceil(PAGE_SIZE); + + let alloc = data.get_alloc().unwrap(); + setup_kernel_page_array(alloc, count_pages); if let Some(early_alloc) = data.take_alloc() { for range in early_alloc.into_iter() { From cfc959e53dabfcde855cea830610ff0a783c82d5 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 01:09:33 +0800 Subject: [PATCH 11/25] script: add a script to help translate stacktraces - Add script/backtrace to translate backtraces. - Add a cut sign in the kernel panic routine to indicate the start of stack backtrace. Signed-off-by: greatbridf --- script/backtrace | 100 +++++++++++++++++++++++++++++++++++++++++++++++ src/panic.rs | 6 +++ 2 files changed, 106 insertions(+) create mode 100755 script/backtrace diff --git a/script/backtrace b/script/backtrace new file mode 100755 index 00000000..8a60c829 --- /dev/null +++ b/script/backtrace @@ -0,0 +1,100 @@ +#!/bin/bash + +ADDR2LINE=${ADDR2LINE:-riscv64-unknown-elf-addr2line} + +ksym=build/riscv64gc-unknown-none-elf/debug/eonix_kernel + +usage() { + cat < Use the given kernel symbol file + -o, --only-gbos Show kernel function calls only + -h, --help Show this message +EOF + exit "$1" +} + +# $1: instruction address +parse_pos() { + addr="$1" + shift + + "$ADDR2LINE" -e "$ksym" -i "$addr" "$@" 2>/dev/null +} + +filter_col() { + [ "$1" -eq 0 ] || awk "{ print \$$1; }" +} + +str_contains() { + grep -E "$1" >/dev/null 2>&1 +} + +filter_stacktrace() { + NL=$'\n' + _state=nonstart + _out= + while [ $_state != "end" ]; do + read -r _line + case $_state in + nonstart) + str_contains "8< CUT HERE" <<< "$_line" && _state=save + ;; + save) + if str_contains "8< CUT HERE" <<< "$_line"; then + _state=end + else + _out="$_out$_line$NL" + fi + ;; + esac + done + + echo "$_out" +} + +while [ "$#" -gt 0 ]; do + case "$1" in + -s|--ksym) + shift + ksym="$1" + ;; + -o|--only-gbos) + only_gb=y + ;; + --) + shift + break + ;; + -h|--help) + usage 0 + ;; + *) + usage 1 + ;; + esac + shift +done + +stacktrace="$(filter_stacktrace)" + +i=1 +for addr in $(filter_col 3 <<< "$stacktrace"); do + pos="$(parse_pos "$addr" "$@")" + + if [ -n "$only_gb" ]; then + if ! str_contains "greatbridf_os" <<< "$pos"; then + continue + fi + fi + + printf "========== %4d ==========\n" "$i" + + parse_pos "$addr" "$@" + + i=$((i + 1)) +done diff --git a/src/panic.rs b/src/panic.rs index 3c9c5f34..4a9ef92c 100644 --- a/src/panic.rs +++ b/src/panic.rs @@ -24,6 +24,12 @@ pub fn stack_trace() { UnwindReasonCode::NO_REASON } + println_fatal!("--------------8< CUT HERE 8<--------------"); + println_fatal!("Stacktrace:"); + println_fatal!(); + let mut data = CallbackData { counter: 0 }; _Unwind_Backtrace(callback, &raw mut data as *mut c_void); + + println_fatal!("--------------8< CUT HERE 8<--------------"); } From b0c8ef4ccc367cbbcb83ca9901e19ffad20e2758 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 02:35:05 +0800 Subject: [PATCH 12/25] style: fix or suppress warnings No functional changes - Add `{extern_,}symbol_addr` macro to retrieve symbol address. - Remove manual impl Send and Sync for RawPage - Make elided '_ lifetimes in return types explicit - Suppress unused warnings by allowing them - Remove really unused functions - Refactor `println_trace` macro to suppress unused variable warnings Signed-off-by: greatbridf --- crates/eonix_hal/src/lib.rs | 26 +++++++++ crates/eonix_log/src/lib.rs | 22 ++++++-- crates/eonix_percpu/src/lib.rs | 31 +++++------ crates/eonix_runtime/src/scheduler.rs | 35 +++++------- crates/eonix_sync/eonix_spin/src/lib.rs | 14 +++-- .../eonix_sync/eonix_sync_rt/src/spin_irq.rs | 4 +- crates/posix_types/src/poll.rs | 2 + macros/src/lib.rs | 4 +- src/driver/ahci/slot.rs | 4 +- src/driver/e1000e.rs | 2 + src/driver/serial/io.rs | 15 ++++-- src/fs/procfs.rs | 2 + src/fs/tmpfs/file.rs | 6 +-- src/kernel/constants.rs | 2 +- src/kernel/mem/folio.rs | 2 +- src/kernel/mem/mm_list/page_fault.rs | 19 +++---- src/kernel/mem/page_alloc/raw_page.rs | 4 -- src/kernel/pcie/device.rs | 24 +++++---- src/kernel/pcie/header.rs | 17 +++--- src/kernel/pcie/init.rs | 5 +- src/kernel/syscall.rs | 23 ++++---- src/kernel/task.rs | 3 +- src/kernel/task/process.rs | 54 +++++++++---------- src/kernel/vfs/dentry.rs | 6 ++- src/kernel/vfs/types/device_id.rs | 11 ---- src/lib.rs | 5 +- src/path.rs | 6 ++- src/sync/arcswap.rs | 11 ++-- 28 files changed, 188 insertions(+), 171 deletions(-) diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index b9c7d053..d3bf7825 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -43,3 +43,29 @@ pub mod arch_exported { pub use eonix_hal_macros::{ap_main, default_trap_handler, main}; pub use eonix_hal_traits as traits; + +#[macro_export] +macro_rules! symbol_addr { + ($sym:expr) => {{ + ($sym) as *const () as usize + }}; + ($sym:expr, $type:ty) => {{ + ($sym) as *const () as *const $type + }}; +} + +#[macro_export] +macro_rules! extern_symbol_addr { + ($sym:ident) => {{ + unsafe extern "C" { + fn $sym(); + } + $crate::symbol_addr!($sym) + }}; + ($sym:ident, $type:ty) => {{ + unsafe extern "C" { + fn $sym(); + } + $crate::symbol_addr!($sym, $type) + }}; +} diff --git a/crates/eonix_log/src/lib.rs b/crates/eonix_log/src/lib.rs index 92b1639f..01b6a587 100644 --- a/crates/eonix_log/src/lib.rs +++ b/crates/eonix_log/src/lib.rs @@ -2,6 +2,7 @@ use alloc::sync::Arc; use core::fmt::{self, Write}; + use eonix_sync::{Spin, SpinIrq as _}; extern crate alloc; @@ -91,18 +92,31 @@ macro_rules! println_fatal { #[macro_export] macro_rules! println_trace { - ($feat:literal) => { + (feat:$feat:literal) => { #[deny(unexpected_cfgs)] { #[cfg(feature = $feat)] - $crate::println!("[kernel:trace] ") + $crate::println!("[kernel:trace]") } }; - ($feat:literal, $($arg:tt)*) => {{ + (feat:$feat:literal, $fmt:literal) => {{ #[deny(unexpected_cfgs)] { #[cfg(feature = $feat)] - $crate::println!("[kernel:trace] {}", format_args!($($arg)*)) + $crate::println!(concat!("[kernel:trace] ", $feat)) } }}; + (feat:$feat:literal, $fmt:literal, $($arg:expr $(,)?)*) => { + #[deny(unexpected_cfgs)] + { + // Suppress unused variables warning + #[cfg(not(feature = $feat))] + { + $(let _ = $arg;)* + } + + #[cfg(feature = $feat)] + $crate::println!("[kernel:trace] {}", format_args!($fmt, $($arg,)*)) + } + }; } diff --git a/crates/eonix_percpu/src/lib.rs b/crates/eonix_percpu/src/lib.rs index 1fc7ffb8..a00b5c05 100644 --- a/crates/eonix_percpu/src/lib.rs +++ b/crates/eonix_percpu/src/lib.rs @@ -1,28 +1,21 @@ #![no_std] use core::alloc::Layout; -use core::ptr::null_mut; -use core::ptr::NonNull; -use core::sync::atomic::AtomicPtr; -use core::sync::atomic::Ordering; - -#[cfg(target_arch = "x86_64")] -pub use eonix_percpu_macros::define_percpu_x86_64 as define_percpu; - -#[cfg(target_arch = "x86_64")] -pub use eonix_percpu_macros::define_percpu_shared_x86_64 as define_percpu_shared; - -#[cfg(target_arch = "riscv64")] -pub use eonix_percpu_macros::define_percpu_riscv64 as define_percpu; - -#[cfg(target_arch = "riscv64")] -pub use eonix_percpu_macros::define_percpu_shared_riscv64 as define_percpu_shared; +use core::ptr::{null_mut, NonNull}; +use core::sync::atomic::{AtomicPtr, Ordering}; #[cfg(target_arch = "loongarch64")] pub use eonix_percpu_macros::define_percpu_loongarch64 as define_percpu; - +#[cfg(target_arch = "riscv64")] +pub use eonix_percpu_macros::define_percpu_riscv64 as define_percpu; #[cfg(target_arch = "loongarch64")] pub use eonix_percpu_macros::define_percpu_shared_loongarch64 as define_percpu_shared; +#[cfg(target_arch = "riscv64")] +pub use eonix_percpu_macros::define_percpu_shared_riscv64 as define_percpu_shared; +#[cfg(target_arch = "x86_64")] +pub use eonix_percpu_macros::define_percpu_shared_x86_64 as define_percpu_shared; +#[cfg(target_arch = "x86_64")] +pub use eonix_percpu_macros::define_percpu_x86_64 as define_percpu; const MAX_CPUS: usize = 256; @@ -41,7 +34,7 @@ impl PercpuArea { unsafe extern "C" { fn PERCPU_LENGTH(); } - let len = PERCPU_LENGTH as usize; + let len = PERCPU_LENGTH as *const () as usize; assert_ne!(len, 0, "Percpu length should not be zero."); len @@ -52,7 +45,7 @@ impl PercpuArea { fn PERCPU_DATA_START(); } - let addr = PERCPU_DATA_START as usize; + let addr = PERCPU_DATA_START as *const () as usize; NonNull::new(addr as *mut _).expect("Percpu data should not be null.") } diff --git a/crates/eonix_runtime/src/scheduler.rs b/crates/eonix_runtime/src/scheduler.rs index 3f72fbf4..b4b7960d 100644 --- a/crates/eonix_runtime/src/scheduler.rs +++ b/crates/eonix_runtime/src/scheduler.rs @@ -1,20 +1,19 @@ -use crate::{ - executor::OutputHandle, - ready_queue::{local_rq, ReadyQueue}, - task::{Task, TaskAdapter, TaskHandle, TaskState}, -}; -use alloc::{sync::Arc, task::Wake}; -use core::{ - ops::{Deref, DerefMut}, - ptr::NonNull, - task::{Context, Poll, Waker}, -}; +use alloc::sync::Arc; +use alloc::task::Wake; +use core::ops::{Deref, DerefMut}; +use core::ptr::NonNull; +use core::task::{Context, Poll, Waker}; + use eonix_hal::processor::halt; use eonix_log::println_trace; use eonix_sync::{LazyLock, Spin, SpinIrq as _}; use intrusive_collections::RBTree; use pointers::BorrowedArc; +use crate::executor::OutputHandle; +use crate::ready_queue::{local_rq, ReadyQueue}; +use crate::task::{Task, TaskAdapter, TaskHandle, TaskState}; + #[eonix_percpu::define_percpu] static CURRENT_TASK: Option> = None; @@ -93,12 +92,6 @@ impl Runtime { } } - fn current(&self) -> Option> { - CURRENT_TASK - .get() - .map(|ptr| unsafe { BorrowedArc::from_raw(ptr) }) - } - fn remove_and_enqueue_current(&self, rq: &mut impl DerefMut) { let Some(current) = CURRENT_TASK .swap(None) @@ -116,7 +109,7 @@ impl Runtime { }) { Ok(TaskState::READY_RUNNING) => { println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Re-enqueueing task {:?} (CPU{})", current.id, eonix_hal::processor::CPU::local().cpuid(), @@ -126,7 +119,7 @@ impl Runtime { } Ok(_) => { println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Current task {:?} (CPU{}) is blocked, not re-enqueueing", current.id, eonix_hal::processor::CPU::local().cpuid(), @@ -184,7 +177,7 @@ impl Runtime { }; println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Switching to task {:?} (CPU{})", next.id, eonix_hal::processor::CPU::local().cpuid(), @@ -212,7 +205,7 @@ impl Runtime { ); println_trace!( - "trace_scheduler", + feat: "trace_scheduler", "Task {:?} finished execution, removing...", Task::current().id, ); diff --git a/crates/eonix_sync/eonix_spin/src/lib.rs b/crates/eonix_sync/eonix_spin/src/lib.rs index 4718b867..7225aceb 100644 --- a/crates/eonix_sync/eonix_spin/src/lib.rs +++ b/crates/eonix_sync/eonix_spin/src/lib.rs @@ -2,13 +2,11 @@ mod guard; -use core::{ - cell::UnsafeCell, - marker::PhantomData, - sync::atomic::{AtomicBool, Ordering}, -}; -use eonix_sync_base::{Relax, SpinRelax}; +use core::cell::UnsafeCell; +use core::marker::PhantomData; +use core::sync::atomic::{AtomicBool, Ordering}; +use eonix_sync_base::{Relax, SpinRelax}; pub use guard::{SpinGuard, UnlockedSpinGuard}; pub trait SpinContext { @@ -84,7 +82,7 @@ where T: ?Sized, R: Relax, { - pub fn lock_with_context(&self, context: C) -> SpinGuard + pub fn lock_with_context(&self, context: C) -> SpinGuard<'_, T, C, R> where C: SpinContext, { @@ -100,7 +98,7 @@ where ) } - pub fn lock(&self) -> SpinGuard { + pub fn lock(&self) -> SpinGuard<'_, T, DisablePreemption, R> { self.lock_with_context(DisablePreemption::save()) } diff --git a/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs b/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs index 76a28682..b70cdc3d 100644 --- a/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs +++ b/crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs @@ -12,7 +12,7 @@ pub trait SpinIrq { type Context: SpinContext; type Relax; - fn lock_irq(&self) -> SpinGuard; + fn lock_irq(&self) -> SpinGuard<'_, Self::Value, Self::Context, Self::Relax>; } impl SpinContext for IrqContext { @@ -50,7 +50,7 @@ where type Context = IrqContext; type Relax = R; - fn lock_irq(&self) -> SpinGuard { + fn lock_irq(&self) -> SpinGuard<'_, Self::Value, Self::Context, Self::Relax> { self.lock_with_context(IrqContext::save()) } } diff --git a/crates/posix_types/src/poll.rs b/crates/posix_types/src/poll.rs index 781f589f..dcf5f9b2 100644 --- a/crates/posix_types/src/poll.rs +++ b/crates/posix_types/src/poll.rs @@ -1,5 +1,7 @@ pub const FDSET_LENGTH: usize = 1024 / (8 * size_of::()); +// TODO: Implement syscall pselect +#[allow(unused)] pub struct FDSet { fds_bits: [usize; FDSET_LENGTH], } diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 722fa5da..09e12f99 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -123,7 +123,7 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { Box::new_in( async move { eonix_log::println_trace!( - "trace_syscall", + feat: "trace_syscall", "tid{}: {}({}) => {{", thd.tid, #syscall_name_str, @@ -133,7 +133,7 @@ fn define_syscall_impl(attrs: TokenStream, item: TokenStream) -> TokenStream { let retval = #real_fn(thd, #(#args_call),*).await.into_retval(); eonix_log::println_trace!( - "trace_syscall", + feat: "trace_syscall", "}} => {:x?}", retval, ); diff --git a/src/driver/ahci/slot.rs b/src/driver/ahci/slot.rs index dd096f57..fdb61f96 100644 --- a/src/driver/ahci/slot.rs +++ b/src/driver/ahci/slot.rs @@ -39,6 +39,8 @@ enum SlotState { Idle, Working, Finished, + // TODO: Implement AHCI error handling + #[allow(unused)] Error, } @@ -67,7 +69,7 @@ impl CommandList { + (size_of::>() + size_of::>()) * 32 } - pub fn get(&self, index: usize) -> CommandSlot { + pub fn get(&self, index: usize) -> CommandSlot<'_> { CommandSlot { cmdheader: &self.cmdheaders()[index], control: &self.controls()[index], diff --git a/src/driver/e1000e.rs b/src/driver/e1000e.rs index 6d6ca353..923a4594 100644 --- a/src/driver/e1000e.rs +++ b/src/driver/e1000e.rs @@ -61,6 +61,8 @@ struct E1000eDev { tx_tail: Option, rx_buffers: Box<[FolioOwned; RX_DESC_SIZE]>, + // TODO: Implement E1000e send + #[allow(unused)] tx_buffers: Box<[Option; TX_DESC_SIZE]>, } diff --git a/src/driver/serial/io.rs b/src/driver/serial/io.rs index aec18f20..57e61c56 100644 --- a/src/driver/serial/io.rs +++ b/src/driver/serial/io.rs @@ -1,10 +1,11 @@ -use super::SerialRegister; use core::ptr::NonNull; -use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess}; -use eonix_mm::address::{PAddr, PhysAccess}; #[cfg(target_arch = "x86_64")] use eonix_hal::arch_exported::io::Port8; +use eonix_hal::mm::ArchPhysAccess; +use eonix_mm::address::{PAddr, PhysAccess}; + +use super::SerialRegister; #[cfg(target_arch = "x86_64")] pub struct SerialIO { @@ -73,10 +74,12 @@ impl SerialIO { self.line_status } + #[allow(unused)] pub fn modem_status(&self) -> impl SerialRegister { self.modem_status } + #[allow(unused)] pub fn scratch(&self) -> impl SerialRegister { self.scratch } @@ -100,7 +103,7 @@ impl SerialRegister for NonNull { let retval = unsafe { self.as_ptr().read_volatile() }; #[cfg(target_arch = "loongarch64")] - memory_barrier(); + eonix_hal::fence::memory_barrier(); retval } @@ -110,7 +113,7 @@ impl SerialRegister for NonNull { unsafe { self.as_ptr().write_volatile(data) }; #[cfg(target_arch = "loongarch64")] - memory_barrier(); + eonix_hal::fence::memory_barrier(); } } @@ -155,10 +158,12 @@ impl SerialIO { unsafe { self.base_addr.add(5) } } + #[allow(unused)] pub fn modem_status(&self) -> impl SerialRegister { unsafe { self.base_addr.add(6) } } + #[allow(unused)] pub fn scratch(&self) -> impl SerialRegister { unsafe { self.base_addr.add(7) } } diff --git a/src/fs/procfs.rs b/src/fs/procfs.rs index 32ede420..9a3933bb 100644 --- a/src/fs/procfs.rs +++ b/src/fs/procfs.rs @@ -26,6 +26,8 @@ enum NodeKind { struct FileInode { read: Option KResult<()> + Send + Sync>>, + // TODO: Implement writes to procfs files + #[allow(unused)] write: Option<()>, } diff --git a/src/fs/tmpfs/file.rs b/src/fs/tmpfs/file.rs index d560a672..aafae539 100644 --- a/src/fs/tmpfs/file.rs +++ b/src/fs/tmpfs/file.rs @@ -177,7 +177,6 @@ impl InodeOps for FileInode { } pub struct DeviceInode { - is_block: bool, devid: DeviceId, } @@ -199,10 +198,7 @@ impl DeviceInode { ctime: now, mtime: now, }, - Self { - is_block: mode.format() == Format::BLK, - devid, - }, + Self { devid }, ) } } diff --git a/src/kernel/constants.rs b/src/kernel/constants.rs index 4e11d66e..b96387b0 100644 --- a/src/kernel/constants.rs +++ b/src/kernel/constants.rs @@ -36,7 +36,7 @@ pub const ENOTDIR: u32 = 20; pub const EISDIR: u32 = 21; pub const EINVAL: u32 = 22; pub const ENOTTY: u32 = 25; -pub const ENOSPC: u32 = 28; +// pub const ENOSPC: u32 = 28; pub const ESPIPE: u32 = 29; // pub const EROFS: u32 = 30; pub const EPIPE: u32 = 32; diff --git a/src/kernel/mem/folio.rs b/src/kernel/mem/folio.rs index 6647e1af..8ab4d6be 100644 --- a/src/kernel/mem/folio.rs +++ b/src/kernel/mem/folio.rs @@ -79,7 +79,7 @@ impl Folio { } } - pub fn lock(&self) -> LockedFolio { + pub fn lock(&self) -> LockedFolio<'_> { // TODO: actually perform the lock... LockedFolio(self) } diff --git a/src/kernel/mem/mm_list/page_fault.rs b/src/kernel/mem/mm_list/page_fault.rs index 7aac141d..5a56efbc 100644 --- a/src/kernel/mem/mm_list/page_fault.rs +++ b/src/kernel/mem/mm_list/page_fault.rs @@ -1,3 +1,4 @@ +use eonix_hal::extern_symbol_addr; use eonix_hal::mm::flush_tlb; use eonix_hal::traits::fault::PageFaultErrorCode; use eonix_mm::address::{Addr as _, AddrOps as _, VRange}; @@ -24,27 +25,19 @@ impl FixEntry { VAddr::from((self.start + self.length) as usize) } - #[allow(dead_code)] - fn range(&self) -> VRange { - VRange::new(self.start(), self.end()) - } - fn jump_address(&self) -> VAddr { VAddr::from(self.jump_address as usize) } fn entries() -> &'static [FixEntry] { - extern "C" { - fn FIX_START(); - fn FIX_END(); - } + let fix_seg_len_bytes = extern_symbol_addr!(FIX_END) - extern_symbol_addr!(FIX_START); unsafe { - // SAFETY: `FIX_START` and `FIX_END` are defined in the - // linker script in `.rodata` section. + // SAFETY: `FIX_START` and `FIX_END` are defined in the linker script + // in `.rodata` section. core::slice::from_raw_parts( - FIX_START as usize as *const FixEntry, - (FIX_END as usize - FIX_START as usize) / size_of::(), + extern_symbol_addr!(FIX_START, FixEntry), + fix_seg_len_bytes / size_of::(), ) } } diff --git a/src/kernel/mem/page_alloc/raw_page.rs b/src/kernel/mem/page_alloc/raw_page.rs index 0d775245..16d57714 100644 --- a/src/kernel/mem/page_alloc/raw_page.rs +++ b/src/kernel/mem/page_alloc/raw_page.rs @@ -48,10 +48,6 @@ pub struct RawPage { shared_data: PageData, } -// XXX: introduce Folio and remove this. -unsafe impl Send for RawPage {} -unsafe impl Sync for RawPage {} - impl PageFlags { pub const LOCKED: u32 = 1 << 1; pub const BUDDY: u32 = 1 << 2; diff --git a/src/kernel/pcie/device.rs b/src/kernel/pcie/device.rs index 085e7b9a..2a8f150d 100644 --- a/src/kernel/pcie/device.rs +++ b/src/kernel/pcie/device.rs @@ -1,14 +1,17 @@ -use super::{ - header::{Bar, Command}, - CommonHeader, Header, -}; -use crate::kernel::mem::PhysAccess as _; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Arc; +use alloc::vec::Vec; +use core::num::NonZero; +use core::ops::RangeInclusive; + use align_ext::AlignExt; -use alloc::{collections::btree_map::BTreeMap, sync::Arc, vec::Vec}; -use core::{num::NonZero, ops::RangeInclusive}; use eonix_mm::address::{Addr, PAddr, PRange}; use eonix_sync::Spin; +use super::header::{Bar, Command}; +use super::{CommonHeader, Header}; +use crate::kernel::mem::PhysAccess as _; + pub(super) static PCIE_DEVICES: Spin>>> = Spin::new(BTreeMap::new()); @@ -20,7 +23,7 @@ pub struct PCIDevice<'a> { pub device_id: u16, } -#[allow(dead_code)] +#[allow(unused)] #[derive(Clone)] pub struct SegmentGroup { id: usize, @@ -28,6 +31,7 @@ pub struct SegmentGroup { base_address: PAddr, } +#[allow(unused)] #[derive(Clone)] pub struct ConfigSpace { pub bus: u8, @@ -180,10 +184,12 @@ impl PCIDevice<'_> { ); } + #[allow(unused)] pub fn config_space(&self) -> &ConfigSpace { &self.config_space } + #[allow(unused)] pub fn segment_group(&self) -> &SegmentGroup { &self.segment_group } @@ -209,7 +215,7 @@ impl PciMemoryAllocator { self.start += size; eonix_log::println_trace!( - "trace_pci", + feat: "trace_pci", "PciMemoryAllocator: Allocated {} bytes at {:#x}", size, base diff --git a/src/kernel/pcie/header.rs b/src/kernel/pcie/header.rs index 889795d3..0a44ea28 100644 --- a/src/kernel/pcie/header.rs +++ b/src/kernel/pcie/header.rs @@ -1,10 +1,9 @@ +use core::marker::PhantomData; +use core::num::NonZero; +use core::ops::{BitAnd, BitOr, Deref, Not}; +use core::sync::atomic::{AtomicU16, AtomicU32, Ordering}; + use bitflags::bitflags; -use core::{ - marker::PhantomData, - num::NonZero, - ops::{BitAnd, BitOr, Deref, Not}, - sync::atomic::{AtomicU16, AtomicU32, Ordering}, -}; use eonix_hal::fence::memory_barrier; pub trait BitFlag: Sized + Copy { @@ -215,14 +214,14 @@ where } impl CommonHeader { - pub fn command(&self) -> Register { + pub fn command(&self) -> Register<'_, Command> { Register { register: unsafe { AtomicU16::from_ptr((&raw const self._command) as *mut u16) }, _phantom: PhantomData, } } - pub fn status(&self) -> Register { + pub fn status(&self) -> Register<'_, Status> { Register { register: unsafe { AtomicU16::from_ptr((&raw const self._status) as *mut u16) }, _phantom: PhantomData, @@ -231,7 +230,7 @@ impl CommonHeader { } impl Bars<'_> { - pub fn iter(&self) -> impl Iterator + '_ { + pub fn iter(&self) -> impl Iterator> + use<'_> { struct BarsIterator<'a> { bars: &'a [AtomicU32], pos: usize, diff --git a/src/kernel/pcie/init.rs b/src/kernel/pcie/init.rs index 4c183bc5..74a490b4 100644 --- a/src/kernel/pcie/init.rs +++ b/src/kernel/pcie/init.rs @@ -10,6 +10,7 @@ use super::error::PciError; use crate::kernel::mem::PhysAccess as _; use crate::kernel::pcie::device::PciMemoryAllocator; +#[allow(unused)] #[derive(Clone)] struct AcpiHandlerImpl; @@ -34,7 +35,6 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(target_arch = "x86_64")] { use acpi::{AcpiTables, PciConfigRegions}; - use eonix_mm::address::PAddr; let acpi_tables = unsafe { // SAFETY: Our impl should be correct. @@ -69,7 +69,6 @@ pub fn init_pcie() -> Result<(), PciError> { #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { use eonix_hal::arch_exported::fdt::FDT; - use eonix_mm::address::PRange; use crate::kernel::constants::{EINVAL, EIO, ENOENT}; @@ -88,7 +87,7 @@ pub fn init_pcie() -> Result<(), PciError> { let size = u64::from_be_bytes(entry[20..28].try_into().unwrap()); println_trace!( - "trace_pci", + feat: "trace_pci", "PCIe range: PCI address = {:#x}, CPU address = {:#x}, size = {:#x}", pci_address, cpu_address, diff --git a/src/kernel/syscall.rs b/src/kernel/syscall.rs index 78ddcd1c..d06c5d88 100644 --- a/src/kernel/syscall.rs +++ b/src/kernel/syscall.rs @@ -1,11 +1,17 @@ -use super::task::ThreadAlloc; -use crate::kernel::task::Thread; use alloc::boxed::Box; -use core::{future::Future, marker::PhantomData, ops::Deref, pin::Pin}; +use core::future::Future; +use core::marker::PhantomData; +use core::ops::Deref; +use core::pin::Pin; + +use eonix_hal::extern_symbol_addr; use eonix_mm::address::{Addr, VAddr}; use eonix_sync::LazyLock; use posix_types::ctypes::PtrT; +use super::task::ThreadAlloc; +use crate::kernel::task::Thread; + pub mod file_rw; pub mod mm; pub mod net; @@ -280,12 +286,6 @@ impl core::fmt::Debug for UserMut { } static SYSCALL_HANDLERS: LazyLock<[Option; MAX_SYSCALL_NO]> = LazyLock::new(|| { - extern "C" { - // SAFETY: `SYSCALL_HANDLERS` is defined in linker script. - fn RAW_SYSCALL_HANDLERS(); - fn RAW_SYSCALL_HANDLERS_SIZE(); - } - // DO NOT TOUCH THESE FUNCTIONS!!! // THEY ARE USED FOR KEEPING THE OBJECTS NOT STRIPPED BY THE LINKER!!! file_rw::keep_alive(); @@ -294,15 +294,14 @@ static SYSCALL_HANDLERS: LazyLock<[Option; MAX_SYSCALL_NO]> = La procops::keep_alive(); sysinfo::keep_alive(); - let raw_handlers_addr = RAW_SYSCALL_HANDLERS as *const (); - let raw_handlers_size_byte = RAW_SYSCALL_HANDLERS_SIZE as usize; + let raw_handlers_size_byte = extern_symbol_addr!(RAW_SYSCALL_HANDLERS_SIZE); assert!(raw_handlers_size_byte % size_of::() == 0); let raw_handlers_count = raw_handlers_size_byte / size_of::(); let raw_handlers = unsafe { core::slice::from_raw_parts( - raw_handlers_addr as *const RawSyscallHandler, + extern_symbol_addr!(RAW_SYSCALL_HANDLERS, RawSyscallHandler), raw_handlers_count, ) }; diff --git a/src/kernel/task.rs b/src/kernel/task.rs index 3fe6fe97..b0966046 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -11,6 +11,7 @@ mod thread; mod user_tls; pub use clone::{do_clone, CloneArgs, CloneFlags}; +use eonix_hal::symbol_addr; pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; @@ -185,7 +186,7 @@ where trap_ctx.set_user_mode(false); trap_ctx.set_interrupt_enabled(true); let _ = trap_ctx.set_user_call_frame( - execute:: as usize, + symbol_addr!(execute::), Some(sp.addr().get()), None, &[(&raw mut future) as usize, output.get() as usize], diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 421e4b8b..3eff5949 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,34 +1,30 @@ -use super::{ - process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup, - ProcessList, Session, Thread, -}; -use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; -use crate::kernel::task::{CloneArgs, CloneFlags}; -use crate::rcu::call_rcu; -use crate::{ - kernel::mem::MMList, - prelude::*, - rcu::{RCUPointer, RCUReadGuard}, - sync::CondVar, -}; -use alloc::{ - collections::{btree_map::BTreeMap, vec_deque::VecDeque}, - sync::{Arc, Weak}, -}; +use alloc::collections::btree_map::BTreeMap; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::{Arc, Weak}; use core::sync::atomic::{AtomicU32, Ordering}; -use eonix_mm::address::VAddr; + use eonix_sync::{ AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, UnlockableGuard as _, UnlockedGuard as _, }; use pointers::BorrowedArc; use posix_types::constants::{ - CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_PGID, P_PIDFD, + CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_ALL, P_PGID, P_PID, P_PIDFD, }; -use posix_types::constants::{P_ALL, P_PID}; use posix_types::signal::Signal; use posix_types::SIGNAL_COREDUMP; +use super::process_group::ProcessGroupBuilder; +use super::signal::RaiseResult; +use super::thread::ThreadBuilder; +use super::{ProcessGroup, ProcessList, Session, Thread}; +use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; +use crate::kernel::mem::MMList; +use crate::kernel::task::{CloneArgs, CloneFlags}; +use crate::prelude::*; +use crate::rcu::{call_rcu, RCUPointer, RCUReadGuard}; +use crate::sync::CondVar; + pub struct ProcessBuilder { mm_list: Option, exit_signal: Option, @@ -51,8 +47,6 @@ pub struct Process { pub exit_signal: Option, - pub shm_areas: Spin>, - /// Parent process /// /// `parent` must be valid during the whole life of the process. @@ -256,7 +250,6 @@ impl ProcessBuilder { pid: self.pid.expect("should set pid before building"), wait_list: WaitList::new(), mm_list, - shm_areas: Spin::new(BTreeMap::new()), exit_signal: self.exit_signal, parent: RCUPointer::empty(), pgroup: RCUPointer::empty(), @@ -522,17 +515,17 @@ impl Process { } /// Provide RCU locked (maybe inconsistent) access to the session. - pub fn session_rcu(&self) -> RCUReadGuard<'_, BorrowedArc> { + pub fn session_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<'_, Session>> { self.session.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the process group. - pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc> { + pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<'_, ProcessGroup>> { self.pgroup.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the parent process. - pub fn parent_rcu(&self) -> Option>> { + pub fn parent_rcu(&self) -> Option>> { self.parent.load() } @@ -569,7 +562,7 @@ impl WaitList { self.cv_wait_procs.notify_all(); } - pub fn drain_exited(&self) -> DrainExited { + pub fn drain_exited(&self) -> DrainExited<'_> { DrainExited { wait_procs: self.wait_procs.lock(), } @@ -578,7 +571,12 @@ impl WaitList { /// # Safety /// Locks `ProcessList` and `WaitList` at the same time. When `wait` is called, /// releases the lock on `ProcessList` and `WaitList` and waits on `cv_wait_procs`. - pub async fn entry(&self, wait_id: WaitId, want_stop: bool, want_continue: bool) -> Entry { + pub async fn entry( + &self, + wait_id: WaitId, + want_stop: bool, + want_continue: bool, + ) -> Entry<'_, '_, '_> { Entry { process_list: ProcessList::get().read().await, wait_procs: self.wait_procs.lock(), diff --git a/src/kernel/vfs/dentry.rs b/src/kernel/vfs/dentry.rs index 22760de9..a401f4f7 100644 --- a/src/kernel/vfs/dentry.rs +++ b/src/kernel/vfs/dentry.rs @@ -27,6 +27,8 @@ use crate::path::Path; use crate::prelude::*; use crate::rcu::{rcu_read_lock, RCUNode, RCUPointer, RCUReadGuard}; +// TODO: Implement slab reclaim +#[allow(unused)] const D_INVALID: u8 = 0; const D_REGULAR: u8 = 1; const D_DIRECTORY: u8 = 2; @@ -159,7 +161,7 @@ impl Dentry { && &***self.name() == &***other.name() } - pub fn name(&self) -> RCUReadGuard>> { + pub fn name(&self) -> RCUReadGuard<'_, BorrowedArc<'_, Arc<[u8]>>> { self.name.load().expect("Dentry has no name") } @@ -167,7 +169,7 @@ impl Dentry { (***self.name()).clone() } - pub fn parent<'a>(&self) -> RCUReadGuard<'a, BorrowedArc> { + pub fn parent<'a>(&self) -> RCUReadGuard<'a, BorrowedArc<'_, Dentry>> { self.parent.load().expect("Dentry has no parent") } diff --git a/src/kernel/vfs/types/device_id.rs b/src/kernel/vfs/types/device_id.rs index cf3ea886..6dd128ee 100644 --- a/src/kernel/vfs/types/device_id.rs +++ b/src/kernel/vfs/types/device_id.rs @@ -10,17 +10,6 @@ impl DeviceId { pub const fn new(major: u16, minor: u16) -> Self { Self { major, minor } } - - pub const fn from_raw(raw: u32) -> Self { - Self { - major: (raw >> 16) as u16, - minor: (raw & 0xFFFF) as u16, - } - } - - pub const fn to_raw(self) -> u32 { - ((self.major as u32) << 16) | (self.minor as u32) - } } impl Debug for DeviceId { diff --git a/src/lib.rs b/src/lib.rs index 8457169c..4f7fb262 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,7 @@ use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use eonix_hal::arch_exported::bootstrap::shutdown; use eonix_hal::context::TaskContext; use eonix_hal::processor::{halt, CPU, CPU_COUNT}; +use eonix_hal::symbol_addr; use eonix_hal::traits::context::RawTaskContext; use eonix_hal::traits::trap::IrqState; use eonix_hal::trap::disable_irqs_save; @@ -136,7 +137,7 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! { bottom }; ctx.set_interrupt_enabled(true); - ctx.set_program_counter(standard_main as usize); + ctx.set_program_counter(symbol_addr!(standard_main)); ctx.set_stack_pointer(stack_bottom); unsafe { @@ -162,7 +163,7 @@ fn kernel_ap_main(_stack_range: PRange) -> ! { bottom }; ctx.set_interrupt_enabled(true); - ctx.set_program_counter(standard_main as usize); + ctx.set_program_counter(symbol_addr!(standard_main)); ctx.set_stack_pointer(stack_bottom); unsafe { diff --git a/src/path.rs b/src/path.rs index b342ef5f..47b9a4b6 100644 --- a/src/path.rs +++ b/src/path.rs @@ -1,6 +1,8 @@ -use crate::{kernel::constants::ENOENT, prelude::*}; use core::fmt::{self, Debug, Formatter}; +use crate::kernel::constants::ENOENT; +use crate::prelude::*; + #[repr(transparent)] pub struct Path { all: [u8], @@ -23,7 +25,7 @@ impl Path { self.all.starts_with(&['/' as u8]) } - pub fn iter(&self) -> PathIterator { + pub fn iter(&self) -> PathIterator<'_> { PathIterator::new(&self.all) } } diff --git a/src/sync/arcswap.rs b/src/sync/arcswap.rs index fb8219b2..7421659f 100644 --- a/src/sync/arcswap.rs +++ b/src/sync/arcswap.rs @@ -1,9 +1,8 @@ use alloc::sync::Arc; -use core::{ - fmt::{self, Debug, Formatter}, - ptr::NonNull, - sync::atomic::{AtomicPtr, Ordering}, -}; +use core::fmt::{self, Debug, Formatter}; +use core::ptr::NonNull; +use core::sync::atomic::{AtomicPtr, Ordering}; + use pointers::BorrowedArc; unsafe impl Send for ArcSwap where T: Send + Sync {} @@ -33,7 +32,7 @@ impl ArcSwap { } } - pub fn borrow(&self) -> BorrowedArc { + pub fn borrow(&self) -> BorrowedArc<'_, T> { unsafe { BorrowedArc::from_raw( NonNull::new(self.pointer.load(Ordering::Acquire)) From bc84f0be804af2416e0b37a2cbf565f01cd13556 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 02:56:56 +0800 Subject: [PATCH 13/25] user, init: update riscv64 init script Make print message prettier. Also panic when we have an error. Signed-off-by: greatbridf --- user-programs/init_script_riscv64.sh | 88 ++++++++++++++++------------ 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/user-programs/init_script_riscv64.sh b/user-programs/init_script_riscv64.sh index 52b2628c..b5ce95d7 100644 --- a/user-programs/init_script_riscv64.sh +++ b/user-programs/init_script_riscv64.sh @@ -1,60 +1,75 @@ #!/mnt/busybox sh BUSYBOX=/mnt/busybox +TERMINAL=/dev/ttyS0 +VERBOSE= -freeze() { - echo "an error occurred while executing '''$@''', freezing..." >&2 +error() { + printf "\033[91merror: \033[0m%s\n" "$1" >&2 +} + +warn() { + printf "\033[93mwarn : \033[0m%s\n" "$1" >&2 +} + +info() { + printf "\033[92minfo : \033[0m%s\n" "$1" >&2 +} + +die() { + error "$1" && freeze +} +freeze() { + info "freezing..." >&2 while true; do - true + : done + + exit 1 } -do_or_freeze() { - if $@; then - return - fi +unrecoverable() { + die "unrecoverable error occurred. check the message above." +} - freeze $@ +busybox() { + $BUSYBOX "$@" } -do_or_freeze $BUSYBOX mkdir -p /dev +trap unrecoverable EXIT + +set -euo pipefail -do_or_freeze $BUSYBOX mknod -m 666 /dev/console c 5 1 -do_or_freeze $BUSYBOX mknod -m 666 /dev/null c 1 3 -do_or_freeze $BUSYBOX mknod -m 666 /dev/zero c 1 5 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vda b 8 0 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vda1 b 8 1 -do_or_freeze $BUSYBOX mknod -m 666 /dev/vdb b 8 16 -do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS0 c 4 64 -do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS1 c 4 65 +if [ -n "$VERBOSE" ]; then + set -x +fi -echo -n -e "deploying busybox... " >&2 +busybox mkdir -p /dev -do_or_freeze $BUSYBOX mkdir -p /bin -do_or_freeze $BUSYBOX --install -s /bin -do_or_freeze $BUSYBOX mkdir -p /lib +busybox mknod -m 666 /dev/console c 5 1 +busybox mknod -m 666 /dev/null c 1 3 +busybox mknod -m 666 /dev/zero c 1 5 +busybox mknod -m 666 /dev/vda b 8 0 +busybox mknod -m 666 /dev/vda1 b 8 1 +busybox mknod -m 666 /dev/vdb b 8 16 +busybox mknod -m 666 /dev/ttyS0 c 4 64 +busybox mknod -m 666 /dev/ttyS1 c 4 65 -export PATH="/bin" +info "deploying busybox..." -echo ok >&2 +busybox mkdir -p /bin /lib +busybox --install -s /bin -do_or_freeze mkdir -p /etc /root /proc -do_or_freeze mount -t procfs proc proc +info "done" -# Check if the device /dev/vdb is available and can be read -if dd if=/dev/vdb of=/dev/null bs=512 count=1; then - echo -n -e "Mounting the ext4 image... " >&2 - do_or_freeze mkdir -p /mnt1 - do_or_freeze mount -t ext4 /dev/vdb /mnt1 - echo ok >&2 -fi +export PATH="/bin" -cp /mnt/ld-musl-i386.so.1 /lib/ld-musl-i386.so.1 -ln -s /lib/ld-musl-i386.so.1 /lib/libc.so +mkdir -p /etc /root /proc +mount -t procfs proc proc cat > /etc/passwd < /etc/group < /dev/ttyS0 2> /dev/ttyS0 +# shellcheck disable=SC2094 +exec sh -l < "$TERMINAL" > "$TERMINAL" 2> "$TERMINAL" # We don't have a working init yet, so we use busybox sh directly for now. # exec /mnt/init /bin/sh -c 'exec sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0' From 47d890648a7895eb620335d4f233e8d1ad160938 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 14:33:38 +0800 Subject: [PATCH 14/25] chore: remove and ignore vscode settings from svc Signed-off-by: greatbridf --- .gitignore | 1 + .vscode/settings.json | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index fbc2a9b1..4684b698 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ build/ .idea/ +.vscode/settings.json test/ diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 10b4a8b4..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "makefile.configureOnOpen": false, - "editor.formatOnSave": true, -} From b6d54d6a0ff15b96273ddc499670620b2baceefe Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sun, 18 Jan 2026 21:35:39 +0800 Subject: [PATCH 15/25] mm, proc: add an exited thread reaper - thd.exit() will set thd.dead and send it to the reaper - delay the release of process mm until we reap it - extract futex logic out of exit and exec routine Signed-off-by: greatbridf --- src/kernel/mem/mm_list.rs | 30 +++---- src/kernel/syscall/procops.rs | 26 ++---- src/kernel/task.rs | 5 +- src/kernel/task/futex.rs | 53 +++++++++--- src/kernel/task/process_list.rs | 144 ++++++++++++++++++++------------ src/kernel/task/signal.rs | 20 ++--- src/kernel/task/thread.rs | 49 ++++++++--- src/lib.rs | 23 +---- 8 files changed, 209 insertions(+), 141 deletions(-) diff --git a/src/kernel/mem/mm_list.rs b/src/kernel/mem/mm_list.rs index 5221c73b..f073025b 100644 --- a/src/kernel/mem/mm_list.rs +++ b/src/kernel/mem/mm_list.rs @@ -398,20 +398,6 @@ impl MMList { assert_ne!(old_user_count, 0); } - /// Deactivate `self` and activate `to` with root page table changed only once. - /// This might reduce the overhead of switching page tables twice. - #[allow(dead_code)] - pub fn switch(&self, to: &Self) { - self.user_count.fetch_add(1, Ordering::Acquire); - - let root_page_table = self.root_page_table.load(Ordering::Relaxed); - assert_ne!(root_page_table, 0); - set_root_page_table_pfn(PFN::from(PAddr::from(root_page_table))); - - let old_user_count = to.user_count.fetch_sub(1, Ordering::Release); - assert_ne!(old_user_count, 0); - } - /// Replace the current page table with a new one. /// /// # Safety @@ -454,10 +440,24 @@ impl MMList { // TODO: Check whether we should wake someone up if they've been put // to sleep when calling `vfork`. - self.inner + let old_mm = self + .inner .swap(new.map(|new_mm| new_mm.inner.swap(None)).flatten()); eonix_preempt::enable(); + + // This could take long... + drop(old_mm); + } + + pub fn release(&self) { + let old_mm = self.inner.swap(None); + let old_table = self.root_page_table.swap(0, Ordering::Relaxed); + + // TODO: Remove this completely... + // XXX: `ArcSwap` is broken and never safe to use. Check `replace` above. + assert_ne!(old_table, 0, "Already released?"); + assert!(old_mm.is_some(), "Already released?"); } /// No need to do invalidation manually, `PageTable` already does it. diff --git a/src/kernel/syscall/procops.rs b/src/kernel/syscall/procops.rs index 1359d0ab..3e815f25 100644 --- a/src/kernel/syscall/procops.rs +++ b/src/kernel/syscall/procops.rs @@ -22,8 +22,8 @@ use crate::kernel::constants::{ use crate::kernel::mem::PageBuffer; use crate::kernel::syscall::{User, UserMut}; use crate::kernel::task::{ - do_clone, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, FutexOp, - ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, + do_clone, futex_exec, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, + FutexOp, ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType, }; use crate::kernel::timer::sleep; use crate::kernel::user::{UserBuffer, UserPointer, UserPointerMut, UserString}; @@ -213,10 +213,7 @@ async fn execve(exec: User, argv: User, envp: User) -> KResult, argv: User, envp: User) -> KResult SyscallNoReturn { - let mut procs = ProcessList::get().write().await; - - unsafe { - procs - .do_exit(&thread, WaitType::Exited(status), false) - .await; - } + thread.exit(WaitType::Exited(status)); SyscallNoReturn } #[eonix_macros::define_syscall(SYS_EXIT_GROUP)] async fn exit_group(status: u32) -> SyscallNoReturn { - let mut procs = ProcessList::get().write().await; - - unsafe { - procs.do_exit(&thread, WaitType::Exited(status), true).await; - } + // XXX: Send SIGKILL to our sibling threads. + thread.exit(WaitType::Exited(status)); SyscallNoReturn } @@ -856,7 +844,7 @@ async fn rt_sigreturn() -> KResult { "`rt_sigreturn` failed in thread {} with error {err}!", thread.tid ); - thread.force_kill(Signal::SIGSEGV).await; + thread.force_kill(Signal::SIGSEGV); return Err(err); } diff --git a/src/kernel/task.rs b/src/kernel/task.rs index b0966046..6505666c 100644 --- a/src/kernel/task.rs +++ b/src/kernel/task.rs @@ -12,7 +12,10 @@ mod user_tls; pub use clone::{do_clone, CloneArgs, CloneFlags}; use eonix_hal::symbol_addr; -pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead}; +pub use futex::{ + futex_exec, futex_exit, futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, + RobustListHead, +}; pub use kernel_stack::KernelStack; pub use loader::ProgramLoader; pub use process::{alloc_pid, Process, ProcessBuilder, WaitId, WaitObject, WaitType}; diff --git a/src/kernel/task/futex.rs b/src/kernel/task/futex.rs index a04d7091..4dd57615 100644 --- a/src/kernel/task/futex.rs +++ b/src/kernel/task/futex.rs @@ -1,19 +1,17 @@ -use core::pin::pin; - use alloc::sync::Arc; use alloc::vec::Vec; +use core::pin::pin; + use bitflags::bitflags; +use eonix_mm::address::Addr; use eonix_sync::{LazyLock, Mutex, MutexGuard, WaitList}; use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink}; -use crate::{ - kernel::{ - constants::{EAGAIN, EINVAL}, - syscall::User, - user::UserPointer, - }, - prelude::KResult, -}; +use super::Thread; +use crate::kernel::constants::{EAGAIN, EINVAL}; +use crate::kernel::syscall::User; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::prelude::KResult; #[derive(PartialEq, Debug, Clone, Copy)] #[repr(u32)] @@ -318,3 +316,38 @@ impl RobustListHead { Ok(()) } } + +async fn do_futex_exit(thread: &Thread) -> KResult<()> { + if let Some(clear_ctid) = thread.get_clear_ctid() { + UserPointerMut::new(clear_ctid)?.write(0u32)?; + + futex_wake(clear_ctid.addr(), None, 1).await?; + } + + if let Some(robust_list) = thread.get_robust_list() { + robust_list.wake_all().await?; + } + + Ok(()) +} + +pub async fn futex_exit(thread: &Thread) { + // We don't care about any error happened inside. + // If they've set up a wrong pointer, good luck to them... + let _ = do_futex_exit(thread); +} + +async fn do_futex_exec(thread: &Thread) -> KResult<()> { + if let Some(robust_list) = thread.get_robust_list() { + robust_list.wake_all().await?; + thread.set_robust_list(None); + } + + Ok(()) +} + +pub async fn futex_exec(thread: &Thread) { + // We don't care about any error happened inside. + // If they've set up a wrong pointer, good luck to them... + let _ = do_futex_exec(thread); +} diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index af073e84..c676d22e 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -1,16 +1,17 @@ +use alloc::collections::btree_map::BTreeMap; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::{Arc, Weak}; +use core::pin::pin; use core::sync::atomic::Ordering; -use super::{Process, ProcessGroup, Session, Thread, WaitObject, WaitType}; -use crate::{ - kernel::{task::futex_wake, user::UserPointerMut}, - rcu::rcu_sync, -}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use eonix_runtime::scheduler::RUNTIME; +use eonix_sync::{AsProof as _, AsProofMut as _, RwLock, Spin, WaitList}; + +use super::loader::LoadInfo; +use super::{ + alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, ThreadBuilder, WaitObject, }; -use eonix_mm::address::Addr; -use eonix_sync::{AsProof as _, AsProofMut as _, RwLock}; +use crate::rcu::rcu_sync; pub struct ProcessList { /// The init process. @@ -78,7 +79,7 @@ impl ProcessList { } } - pub fn set_init_process(&mut self, init: Arc) { + fn set_init_process(&mut self, init: Arc) { let old_init = self.init.replace(init); assert!(old_init.is_none(), "Init process already set"); } @@ -103,45 +104,66 @@ impl ProcessList { self.sessions.get(&sid).and_then(Weak::upgrade) } - /// Make the process a zombie and notify the parent. - /// # Safety - /// This function will destroy the process and all its threads. - /// It is the caller's responsibility to ensure that the process is not - /// running or will not run after this function is called. - pub async unsafe fn do_exit( - &mut self, - thread: &Thread, - exit_status: WaitType, - is_exiting_group: bool, - ) { - let process = thread.process.clone(); - - if process.pid == 1 { - panic!("init exited"); - } + pub async fn sys_init(load_info: LoadInfo) { + let thread_builder = ThreadBuilder::new() + .name(Arc::from(&b"busybox"[..])) + .entry(load_info.entry_ip, load_info.sp); - let inner = process.inner.access_mut(self.prove_mut()); + let mut process_list = ProcessList::get().write().await; + let (thread, process) = ProcessBuilder::new() + .pid(alloc_pid()) + .mm_list(load_info.mm_list) + .thread_builder(thread_builder) + .build(&mut process_list); - thread.dead.store(true, Ordering::SeqCst); + process_list.set_init_process(process); - if is_exiting_group { - // TODO: Send SIGKILL to all threads. - // todo!() - } + // TODO!!!: Remove this. + thread.files.open_console(); - if thread.tid != process.pid { - self.threads.remove(&thread.tid); - inner.threads.remove(&thread.tid).unwrap(); - } + RUNTIME.spawn(Reaper::daemon()); + RUNTIME.spawn(thread.run()); + } + + pub fn send_to_reaper(thread: Arc) { + GLOBAL_REAPER.reap_list.lock().push_back(thread); + GLOBAL_REAPER.wait.notify_one(); + } +} + +struct Reaper { + reap_list: Spin>>, + wait: WaitList, +} + +static GLOBAL_REAPER: Reaper = Reaper { + reap_list: Spin::new(VecDeque::new()), + wait: WaitList::new(), +}; - if let Some(clear_ctid) = thread.get_clear_ctid() { - let _ = UserPointerMut::new(clear_ctid).unwrap().write(0u32); +impl Reaper { + async fn reap(&self, thread: Arc) { + let exit_status = thread + .exit_status + .lock() + .take() + .expect("Exited thread with no exit status"); - let _ = futex_wake(clear_ctid.addr(), None, 1).await; + let process = &thread.process; + + if process.pid == 1 && thread.tid == process.pid { + panic!("init exited"); } - if let Some(robust_list) = thread.get_robust_list() { - let _ = robust_list.wake_all().await; + let mut procs = ProcessList::get().write().await; + + let inner = process.inner.access_mut(procs.prove_mut()); + + thread.dead.store(true, Ordering::SeqCst); + + if thread.tid != process.pid { + procs.threads.remove(&thread.tid); + inner.threads.remove(&thread.tid).unwrap(); } // main thread exit @@ -151,48 +173,62 @@ impl ProcessList { thread.files.close_all().await; // If we are the session leader, we should drop the control terminal. - if process.session(self.prove()).sid == process.pid { - if let Some(terminal) = process.session(self.prove()).drop_control_terminal().await + if process.session(procs.prove()).sid == process.pid { + if let Some(terminal) = process.session(procs.prove()).drop_control_terminal().await { terminal.drop_session().await; } } // Release the MMList as well as the page table. - unsafe { - // SAFETY: We are exiting the process, so no one might be using it. - process.mm_list.replace(None); - } + process.mm_list.release(); // Make children orphans (adopted by init) { - let init = self.init_process(); + let init = procs.init_process(); inner.children.retain(|_, child| { let child = child.upgrade().unwrap(); // SAFETY: `child.parent` must be ourself. So we don't need to free it. unsafe { child.parent.swap(Some(init.clone())) }; - init.add_child(&child, self.prove_mut()); + init.add_child(&child, procs.prove_mut()); false }); } - let mut init_notify = self.init_process().notify_batch(); + let mut init_notify = procs.init_process().notify_batch(); process .wait_list .drain_exited() .into_iter() .for_each(|item| init_notify.notify(item)); - init_notify.finish(self.prove()); + init_notify.finish(procs.prove()); - process.parent(self.prove()).notify( + process.parent(procs.prove()).notify( process.exit_signal, WaitObject { pid: process.pid, code: exit_status, }, - self.prove(), + procs.prove(), ); } } + + async fn daemon() { + let me = &GLOBAL_REAPER; + + loop { + let mut wait = pin!(me.wait.prepare_to_wait()); + wait.as_mut().add_to_wait_list(); + + let thd_to_reap = me.reap_list.lock().pop_front(); + if let Some(thd_to_reap) = thd_to_reap { + me.reap(thd_to_reap).await; + continue; + } + + wait.await; + } + } } diff --git a/src/kernel/task/signal.rs b/src/kernel/task/signal.rs index d9970cad..0a7b580d 100644 --- a/src/kernel/task/signal.rs +++ b/src/kernel/task/signal.rs @@ -1,11 +1,10 @@ mod signal_action; -use super::{ProcessList, Thread, WaitObject, WaitType}; -use crate::kernel::constants::{EFAULT, EINVAL}; -use crate::{kernel::user::UserPointer, prelude::*}; use alloc::collections::binary_heap::BinaryHeap; use alloc::sync::Arc; -use core::{cmp::Reverse, task::Waker}; +use core::cmp::Reverse; +use core::task::Waker; + use eonix_hal::fpu::FpuState; use eonix_hal::traits::trap::RawTrapContext; use eonix_hal::trap::TrapContext; @@ -14,9 +13,13 @@ use eonix_sync::AsProof as _; use intrusive_collections::UnsafeRef; use posix_types::signal::{SigSet, Signal}; use posix_types::{SIGNAL_IGNORE, SIGNAL_NOW, SIGNAL_STOP}; +pub use signal_action::SignalAction; use signal_action::SignalActionList; -pub use signal_action::SignalAction; +use super::{ProcessList, Thread, WaitObject, WaitType}; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::user::UserPointer; +use crate::prelude::*; pub(self) const SAVED_DATA_SIZE: usize = size_of::() + size_of::() + size_of::(); @@ -168,10 +171,7 @@ impl SignalList { pub async fn handle(&self, trap_ctx: &mut TrapContext, fpu_state: &mut FpuState) { loop { let signal = { - let signal = match self.inner.lock().pop() { - Some(signal) => signal, - None => return, - }; + let Some(signal) = self.inner.lock().pop() else { return }; let handler = self.inner.lock().actions.get(signal); if let SignalAction::SimpleHandler { mask, .. } = &handler { @@ -246,7 +246,7 @@ impl SignalList { } signal => { // Default to terminate the thread. - Thread::current().force_kill(signal).await; + Thread::current().force_kill(signal); return; } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 77e8e618..7e005875 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -24,8 +24,7 @@ use super::{stackful, Process, ProcessList, WaitType}; use crate::kernel::interrupt::default_irq_handler; use crate::kernel::syscall::{syscall_handlers, SyscallHandler, User, UserMut}; use crate::kernel::task::clone::CloneArgs; -use crate::kernel::task::futex::RobustListHead; -use crate::kernel::task::CloneFlags; +use crate::kernel::task::{futex_exit, CloneFlags, RobustListHead}; use crate::kernel::timer::{should_reschedule, timer_interrupt}; use crate::kernel::user::{UserPointer, UserPointerMut}; use crate::kernel::vfs::filearray::FileArray; @@ -83,6 +82,7 @@ pub struct Thread { pub fpu_state: AtomicUniqueRefCell, pub dead: AtomicBool, + pub exit_status: Spin>, inner: Spin, } @@ -240,6 +240,7 @@ impl ThreadBuilder { trap_ctx: AtomicUniqueRefCell::new(trap_ctx), fpu_state: AtomicUniqueRefCell::new(fpu_state), dead: AtomicBool::new(false), + exit_status: Spin::new(None), inner: Spin::new(ThreadInner { name, tls: self.tls, @@ -331,18 +332,26 @@ impl Thread { } } - pub async fn force_kill(&self, signal: Signal) { - let mut proc_list = ProcessList::get().write().await; - unsafe { - // SAFETY: Preemption is disabled. - proc_list - .do_exit(self, WaitType::Signaled(signal), false) - .await; + pub fn exit(&self, exit_status: WaitType) { + { + let mut self_status = self.exit_status.lock(); + if self_status.is_some() { + // Someone has got here before us. + return; + } + + *self_status = Some(exit_status); } + + self.dead.store(true, Ordering::Release); + } + + pub fn force_kill(&self, signal: Signal) { + self.exit(WaitType::Signaled(signal)); } pub fn is_dead(&self) -> bool { - self.dead.load(Ordering::SeqCst) + self.dead.load(Ordering::Acquire) } async fn real_run(&self) { @@ -385,6 +394,10 @@ impl Thread { error_code, address: addr, }) => { + if self.is_dead() { + return; + } + let mms = &self.process.mm_list; if let Err(signal) = mms.handle_user_page_fault(addr, error_code).await { self.signal_list.raise(signal); @@ -407,6 +420,10 @@ impl Thread { } } TrapType::Syscall { no, args } => { + if self.is_dead() { + return; + } + if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); @@ -447,7 +464,17 @@ impl Thread { } pub fn run(self: Arc) -> impl Future + Send + 'static { - async move { self.contexted(stackful(self.real_run())).await } + async move { + self.contexted(async { + stackful(self.real_run()).await; + + futex_exit(&self).await; + }) + .await; + + assert!(self.is_dead(), "`real_run` returned before the thread die?"); + ProcessList::send_to_reaper(self); + } } } diff --git a/src/lib.rs b/src/lib.rs index 4f7fb262..2e28db24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,7 +31,6 @@ mod rcu; mod sync; use alloc::ffi::CString; -use alloc::sync::Arc; use core::hint::spin_loop; use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; @@ -46,7 +45,7 @@ use eonix_mm::address::PRange; use eonix_runtime::executor::Stack; use eonix_runtime::scheduler::RUNTIME; use kernel::mem::GlobalPageAlloc; -use kernel::task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder}; +use kernel::task::{KernelStack, ProcessList, ProgramLoader}; use kernel::vfs::dentry::Dentry; use kernel::vfs::mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY}; use kernel::vfs::types::Permission; @@ -56,8 +55,6 @@ use kernel_init::setup_memory; use path::Path; use prelude::*; -use crate::kernel::task::alloc_pid; - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] fn do_panic() -> ! { #[cfg(target_arch = "riscv64")] @@ -276,21 +273,5 @@ async fn init_process(early_kstack: PRange) { .expect("Failed to load init program") }; - let thread_builder = ThreadBuilder::new() - .name(Arc::from(&b"busybox"[..])) - .entry(load_info.entry_ip, load_info.sp); - - let mut process_list = ProcessList::get().write().await; - let (thread, process) = ProcessBuilder::new() - .pid(alloc_pid()) - .mm_list(load_info.mm_list) - .thread_builder(thread_builder) - .build(&mut process_list); - - process_list.set_init_process(process); - - // TODO!!!: Remove this. - thread.files.open_console(); - - RUNTIME.spawn(thread.run()); + ProcessList::sys_init(load_info).await; } From 997edb05a2f286c97e49cf155eca7270fed79e05 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Wed, 21 Jan 2026 00:41:58 +0800 Subject: [PATCH 16/25] proc: rewrite process list organization - Use intrusive lists to store and organize the process hierarchy. - Remove `FileArray::open_console()`. Do it in the init script instead. - Fix open logic: acquire controlling terminals only if O_NOCTTY is not set. Put this into TerminalFile::open(). - Send SIGHUP and then SIGCONT to foreground pgroup procs when the controlling terminal is dropped. - Set the controlling terminal of sessions in Terminal. - Limit max line width to 80. Format some codes. Signed-off-by: greatbridf --- .rustfmt.toml | 4 +- Cargo.toml | 4 +- crates/posix_types/src/open.rs | 6 +- src/kernel/chardev.rs | 84 ++++----- src/kernel/syscall/file_rw.rs | 144 +++++++++++--- src/kernel/task/process.rs | 270 ++++++++++++++++++--------- src/kernel/task/process_group.rs | 146 +++++++++------ src/kernel/task/process_list.rs | 151 ++++++++------- src/kernel/task/session.rs | 187 ++++++++++++------- src/kernel/task/thread.rs | 106 +++++++++-- src/kernel/terminal.rs | 156 ++++++++++------ src/kernel/vfs/file/terminal_file.rs | 68 +++++-- src/kernel/vfs/filearray.rs | 88 ++++----- user-programs/init_script_riscv64.sh | 6 +- 14 files changed, 927 insertions(+), 493 deletions(-) diff --git a/.rustfmt.toml b/.rustfmt.toml index 17b2bbc5..85b1cfc7 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,4 +1,4 @@ -max_width = 100 +max_width = 80 hard_tabs = false tab_spaces = 4 newline_style = "Auto" @@ -14,7 +14,7 @@ single_line_if_else_max_width = 60 single_line_let_else_max_width = 60 wrap_comments = false format_code_in_doc_comments = false -doc_comment_code_block_width = 100 +doc_comment_code_block_width = 80 comment_width = 80 normalize_comments = false normalize_doc_attributes = false diff --git a/Cargo.toml b/Cargo.toml index 4fcb6f70..214e5941 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,8 +27,10 @@ pointers = { path = "./crates/pointers" } posix_types = { path = "./crates/posix_types" } slab_allocator = { path = "./crates/slab_allocator" } +intrusive-collections = { version = "0.9.8", features = [ + "nightly", +], git = "https://github.com/greatbridf/intrusive-rs" } bitflags = "2.6.0" -intrusive-collections = { version = "0.9.8", git = "https://github.com/greatbridf/intrusive-rs" } itertools = { version = "0.13.0", default-features = false } acpi = "5.2.0" align_ext = "0.1.0" diff --git a/crates/posix_types/src/open.rs b/crates/posix_types/src/open.rs index 758ea331..7135e5b8 100644 --- a/crates/posix_types/src/open.rs +++ b/crates/posix_types/src/open.rs @@ -11,6 +11,8 @@ bitflags! { const O_CREAT = 0x40; /// Exclusive access, fail if file exists const O_EXCL = 0x80; + /// Don't set controlling terminal. + const O_NOCTTY = 0x100; /// Truncate file to zero length if it exists const O_TRUNC = 0x200; /// Open file in append mode @@ -116,6 +118,8 @@ impl AtFlags { } pub fn statx_default_sync(&self) -> bool { - !self.intersects(AtFlags::AT_STATX_FORCE_SYNC | AtFlags::AT_STATX_DONT_SYNC) + !self.intersects( + AtFlags::AT_STATX_FORCE_SYNC | AtFlags::AT_STATX_DONT_SYNC, + ) } } diff --git a/src/kernel/chardev.rs b/src/kernel/chardev.rs index 4e01d83a..e4a6e1b3 100644 --- a/src/kernel/chardev.rs +++ b/src/kernel/chardev.rs @@ -1,22 +1,18 @@ -use super::{ - console::get_console, - constants::{EEXIST, EIO}, - task::{block_on, ProcessList, Thread}, - terminal::Terminal, - vfs::{types::DeviceId, File, FileType, TerminalFile}, -}; -use crate::{ - io::{Buffer, Stream, StreamRead}, - prelude::*, -}; -use alloc::{ - boxed::Box, - collections::btree_map::{BTreeMap, Entry}, - sync::Arc, -}; -use eonix_sync::AsProof as _; +use alloc::boxed::Box; +use alloc::collections::btree_map::{BTreeMap, Entry}; +use alloc::sync::Arc; + use posix_types::open::OpenFlags; +use super::console::get_console; +use super::constants::{EEXIST, EIO}; +use super::task::{block_on, Thread}; +use super::terminal::Terminal; +use super::vfs::types::DeviceId; +use super::vfs::{File, FileType, TerminalFile}; +use crate::io::{Buffer, Stream, StreamRead}; +use crate::prelude::*; + pub trait VirtualCharDevice: Send + Sync { fn read(&self, buffer: &mut dyn Buffer) -> KResult; fn write(&self, stream: &mut dyn Stream) -> KResult; @@ -33,12 +29,15 @@ pub struct CharDevice { device: CharDeviceType, } -static CHAR_DEVICES: Spin>> = Spin::new(BTreeMap::new()); +static CHAR_DEVICES: Spin>> = + Spin::new(BTreeMap::new()); impl CharDevice { pub fn read(&self, buffer: &mut dyn Buffer) -> KResult { match &self.device { - CharDeviceType::Terminal(terminal) => block_on(terminal.read(buffer)), + CharDeviceType::Terminal(terminal) => { + block_on(terminal.read(buffer)) + } CharDeviceType::Virtual(device) => device.read(buffer), } } @@ -46,10 +45,12 @@ impl CharDevice { pub fn write(&self, stream: &mut dyn Stream) -> KResult { match &self.device { CharDeviceType::Virtual(device) => device.write(stream), - CharDeviceType::Terminal(terminal) => stream.read_till_end(&mut [0; 128], |data| { - terminal.write(data); - Ok(()) - }), + CharDeviceType::Terminal(terminal) => { + stream.read_till_end(&mut [0; 128], |data| { + terminal.write(data); + Ok(()) + }) + } } } @@ -57,7 +58,11 @@ impl CharDevice { CHAR_DEVICES.lock().get(&devid).cloned() } - pub fn register(devid: DeviceId, name: Arc, device: CharDeviceType) -> KResult<()> { + pub fn register( + devid: DeviceId, + name: Arc, + device: CharDeviceType, + ) -> KResult<()> { match CHAR_DEVICES.lock().entry(devid) { Entry::Vacant(entry) => { entry.insert(Arc::new(CharDevice { name, device })); @@ -67,26 +72,21 @@ impl CharDevice { } } - pub fn open(self: &Arc, flags: OpenFlags) -> KResult { - Ok(match &self.device { + pub async fn open( + self: &Arc, + thread: &Thread, + flags: OpenFlags, + ) -> KResult { + let file = match &self.device { + CharDeviceType::Virtual(_) => { + File::new(flags, FileType::CharDev(self.clone())) + } CharDeviceType::Terminal(terminal) => { - let procs = block_on(ProcessList::get().read()); - let current = Thread::current(); - let session = current.process.session(procs.prove()); - // We only set the control terminal if the process is the session leader. - if session.sid == Thread::current().process.pid { - // Silently fail if we can't set the control terminal. - dont_check!(block_on(session.set_control_terminal( - &terminal, - false, - procs.prove() - ))); - } - - TerminalFile::new(terminal.clone(), flags) + TerminalFile::open(thread, terminal, flags).await } - CharDeviceType::Virtual(_) => File::new(flags, FileType::CharDev(self.clone())), - }) + }; + + Ok(file) } } diff --git a/src/kernel/syscall/file_rw.rs b/src/kernel/syscall/file_rw.rs index 93a543d7..8ac9c22a 100644 --- a/src/kernel/syscall/file_rw.rs +++ b/src/kernel/syscall/file_rw.rs @@ -12,7 +12,8 @@ use posix_types::syscall_no::*; use super::{FromSyscallArg, User}; use crate::io::{Buffer, BufferFill, IntoStream}; use crate::kernel::constants::{ - EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, + EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, + SEEK_SET, }; use crate::kernel::syscall::UserMut; use crate::kernel::task::Thread; @@ -61,7 +62,13 @@ async fn dentry_from( let dir_file = thread.files.get(dirfd).ok_or(EBADF)?; let dir_dentry = dir_file.as_path().ok_or(ENOTDIR)?; - Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink).await + Dentry::open_at( + &thread.fs_context, + dir_dentry, + path, + follow_symlink, + ) + .await } } } @@ -79,7 +86,12 @@ async fn read(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_PREAD64)] -async fn pread64(fd: FD, buffer: UserMut, bufsize: usize, offset: usize) -> KResult { +async fn pread64( + fd: FD, + buffer: UserMut, + bufsize: usize, + offset: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -104,7 +116,12 @@ async fn write(fd: FD, buffer: User, count: usize) -> KResult { } #[eonix_macros::define_syscall(SYS_PWRITE64)] -async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KResult { +async fn pwrite64( + fd: FD, + buffer: User, + count: usize, + offset: usize, +) -> KResult { let buffer = CheckedUserPointer::new(buffer, count)?; let mut stream = buffer.into_stream(); @@ -117,11 +134,17 @@ async fn pwrite64(fd: FD, buffer: User, count: usize, offset: usize) -> KRes } #[eonix_macros::define_syscall(SYS_OPENAT)] -async fn openat(dirfd: FD, pathname: User, flags: OpenFlags, mode: Mode) -> KResult { - let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?; +async fn openat( + dirfd: FD, + pathname: User, + flags: OpenFlags, + mode: Mode, +) -> KResult { + let dentry = + dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?; let perm = mode.perm().mask_with(*thread.fs_context.umask.lock()); - thread.files.open(&dentry, flags, perm).await + thread.files.open(thread, &dentry, flags, perm).await } #[cfg(target_arch = "x86_64")] @@ -156,7 +179,8 @@ async fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { #[eonix_macros::define_syscall(SYS_PIPE2)] async fn pipe2(pipe_fd: UserMut<[FD; 2]>, flags: OpenFlags) -> KResult<()> { - let mut buffer = UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; + let mut buffer = + UserBuffer::new(pipe_fd.cast(), core::mem::size_of::<[FD; 2]>())?; let (read_fd, write_fd) = thread.files.pipe(flags)?; buffer.copy(&[read_fd, write_fd])?.ok_or(EFAULT) @@ -170,7 +194,11 @@ async fn pipe(pipe_fd: UserMut<[FD; 2]>) -> KResult<()> { #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_GETDENTS)] -async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { +async fn getdents( + fd: FD, + buffer: UserMut, + bufsize: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -184,7 +212,11 @@ async fn getdents(fd: FD, buffer: UserMut, bufsize: usize) -> KResult } #[eonix_macros::define_syscall(SYS_GETDENTS64)] -async fn getdents64(fd: FD, buffer: UserMut, bufsize: usize) -> KResult { +async fn getdents64( + fd: FD, + buffer: UserMut, + bufsize: usize, +) -> KResult { let mut buffer = UserBuffer::new(buffer, bufsize)?; thread @@ -230,7 +262,8 @@ async fn newfstatat( )] #[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTAT64))] async fn newfstat(fd: FD, statbuf: UserMut) -> KResult<()> { - sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH).await + sys_newfstatat(thread, fd, User::null(), statbuf, AtFlags::AT_EMPTY_PATH) + .await } #[eonix_macros::define_syscall(SYS_STATX)] @@ -307,7 +340,11 @@ async fn unlink(pathname: User) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SYMLINKAT)] -async fn symlinkat(target: User, dirfd: FD, linkpath: User) -> KResult<()> { +async fn symlinkat( + target: User, + dirfd: FD, + linkpath: User, +) -> KResult<()> { let target = UserString::new(target)?; let dentry = dentry_from(thread, dirfd, linkpath, false).await?; @@ -341,7 +378,12 @@ impl UserDeviceId { } #[eonix_macros::define_syscall(SYS_MKNODAT)] -async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceId) -> KResult<()> { +async fn mknodat( + dirfd: FD, + pathname: User, + mut mode: Mode, + dev: UserDeviceId, +) -> KResult<()> { if !mode.is_blk() && !mode.is_chr() { return Err(EINVAL); } @@ -354,7 +396,11 @@ async fn mknodat(dirfd: FD, pathname: User, mut mode: Mode, dev: UserDeviceI #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_MKNOD)] -async fn mknod(pathname: User, mode: Mode, dev: UserDeviceId) -> KResult<()> { +async fn mknod( + pathname: User, + mode: Mode, + dev: UserDeviceId, +) -> KResult<()> { sys_mknodat(thread, FD::AT_FDCWD, pathname, mode, dev).await } @@ -373,11 +419,20 @@ async fn readlinkat( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_READLINK)] -async fn readlink(pathname: User, buffer: UserMut, bufsize: usize) -> KResult { +async fn readlink( + pathname: User, + buffer: UserMut, + bufsize: usize, +) -> KResult { sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize).await } -async fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult { +async fn do_lseek( + thread: &Thread, + fd: FD, + offset: u64, + whence: u32, +) -> KResult { let file = thread.files.get(fd).ok_or(EBADF)?; Ok(match whence { @@ -403,7 +458,8 @@ async fn llseek( result: UserMut, whence: u32, ) -> KResult<()> { - let mut result = UserBuffer::new(result.cast(), core::mem::size_of::())?; + let mut result = + UserBuffer::new(result.cast(), core::mem::size_of::())?; let offset = ((offset_high as u64) << 32) | (offset_low as u64); let new_offset = do_lseek(thread, fd, offset, whence).await?; @@ -434,9 +490,10 @@ async fn readv(fd: FD, iov_user: User, iovcnt: u32) -> KResult { Ok(IoVec { len: Long::ZERO, .. }) => None, - Ok(IoVec { base, len }) => { - Some(UserBuffer::new(UserMut::with_addr(base.addr()), len.get())) - } + Ok(IoVec { base, len }) => Some(UserBuffer::new( + UserMut::with_addr(base.addr()), + len.get(), + )), }) .collect::>>()?; @@ -471,8 +528,11 @@ async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { len: Long::ZERO, .. }) => None, Ok(IoVec { base, len }) => Some( - CheckedUserPointer::new(User::with_addr(base.addr()), len.get()) - .map(|ptr| ptr.into_stream()), + CheckedUserPointer::new( + User::with_addr(base.addr()), + len.get(), + ) + .map(|ptr| ptr.into_stream()), ), }) .collect::>>()?; @@ -491,7 +551,12 @@ async fn writev(fd: FD, iov_user: User, iovcnt: u32) -> KResult { } #[eonix_macros::define_syscall(SYS_FACCESSAT)] -async fn faccessat(dirfd: FD, pathname: User, _mode: u32, flags: AtFlags) -> KResult<()> { +async fn faccessat( + dirfd: FD, + pathname: User, + _mode: u32, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -522,7 +587,12 @@ async fn access(pathname: User, mode: u32) -> KResult<()> { } #[eonix_macros::define_syscall(SYS_SENDFILE64)] -async fn sendfile64(out_fd: FD, in_fd: FD, offset: UserMut, count: usize) -> KResult { +async fn sendfile64( + out_fd: FD, + in_fd: FD, + offset: UserMut, + count: usize, +) -> KResult { let in_file = thread.files.get(in_fd).ok_or(EBADF)?; let out_file = thread.files.get(out_fd).ok_or(EBADF)?; @@ -627,7 +697,11 @@ async fn pselect6( #[cfg(target_arch = "x86_64")] #[eonix_macros::define_syscall(SYS_POLL)] -async fn poll(fds: UserMut, nfds: u32, timeout: u32) -> KResult { +async fn poll( + fds: UserMut, + nfds: u32, + timeout: u32, +) -> KResult { do_poll(thread, fds, nfds, timeout).await } @@ -639,7 +713,8 @@ async fn fchownat( gid: u32, flags: AtFlags, ) -> KResult<()> { - let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?; + let dentry = + dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?; if !dentry.is_valid() { return Err(ENOENT); } @@ -648,7 +723,12 @@ async fn fchownat( } #[eonix_macros::define_syscall(SYS_FCHMODAT)] -async fn fchmodat(dirfd: FD, pathname: User, mode: Mode, flags: AtFlags) -> KResult<()> { +async fn fchmodat( + dirfd: FD, + pathname: User, + mode: Mode, + flags: AtFlags, +) -> KResult<()> { let dentry = if flags.at_empty_path() { let file = thread.files.get(dirfd).ok_or(EBADF)?; file.as_path().ok_or(EBADF)?.clone() @@ -709,12 +789,16 @@ async fn renameat2( let flags = RenameFlags::from_bits(flags).ok_or(EINVAL)?; // The two flags RENAME_NOREPLACE and RENAME_EXCHANGE are mutually exclusive. - if flags.contains(RenameFlags::RENAME_NOREPLACE | RenameFlags::RENAME_EXCHANGE) { + if flags + .contains(RenameFlags::RENAME_NOREPLACE | RenameFlags::RENAME_EXCHANGE) + { Err(EINVAL)?; } - let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false).await?; - let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false).await?; + let old_dentry = + dentry_from(thread, old_dirfd, old_pathname, false).await?; + let new_dentry = + dentry_from(thread, new_dirfd, new_pathname, false).await?; old_dentry.rename(&new_dentry, flags).await } diff --git a/src/kernel/task/process.rs b/src/kernel/task/process.rs index 3eff5949..1385235d 100644 --- a/src/kernel/task/process.rs +++ b/src/kernel/task/process.rs @@ -1,22 +1,24 @@ -use alloc::collections::btree_map::BTreeMap; use alloc::collections::vec_deque::VecDeque; -use alloc::sync::{Arc, Weak}; +use alloc::sync::Arc; use core::sync::atomic::{AtomicU32, Ordering}; use eonix_sync::{ - AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard, - UnlockableGuard as _, UnlockedGuard as _, + AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, + SpinGuard, UnlockableGuard as _, UnlockedGuard as _, +}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; use pointers::BorrowedArc; use posix_types::constants::{ - CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_ALL, P_PGID, P_PID, P_PIDFD, + CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED, CLD_STOPPED, P_ALL, + P_PGID, P_PID, P_PIDFD, }; use posix_types::signal::Signal; use posix_types::SIGNAL_COREDUMP; -use super::process_group::ProcessGroupBuilder; use super::signal::RaiseResult; -use super::thread::ThreadBuilder; +use super::thread::{ProcessThreads, ThreadBuilder}; use super::{ProcessGroup, ProcessList, Session, Thread}; use crate::kernel::constants::{ECHILD, EINTR, EINVAL, EPERM, ESRCH}; use crate::kernel::mem::MMList; @@ -35,7 +37,6 @@ pub struct ProcessBuilder { pid: Option, } -#[derive(Debug)] pub struct Process { /// Process id /// @@ -66,14 +67,55 @@ pub struct Process { /// The only case where it may be `None` is when the process is kernel thread. pub(super) session: RCUPointer, - /// All things related to the process list. - pub(super) inner: Locked, + pub children: Locked, ProcessList>, + pub threads: Locked, ProcessList>, + + all_procs_link: RBTreeAtomicLink, + group_procs_link: RBTreeAtomicLink, + siblings_link: RBTreeAtomicLink, } -#[derive(Debug)] -pub(super) struct ProcessInner { - pub(super) children: BTreeMap>, - pub(super) threads: BTreeMap>, +intrusive_adapter!(pub AllProcs = Arc: Process { + all_procs_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub GroupProcs = Arc: Process { + group_procs_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub ProcessChildren = Arc: Process { + siblings_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllProcs { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } +} + +impl KeyAdapter<'_> for GroupProcs { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } +} + +impl KeyAdapter<'_> for ProcessChildren { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pid + } } #[derive(Debug)] @@ -148,7 +190,9 @@ impl WaitType { pub fn to_wstatus(self) -> u32 { match self { WaitType::Exited(status) => (status & 0xff) << 8, - WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => signal.into_raw() | 0x80, + WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => { + signal.into_raw() | 0x80 + } WaitType::Signaled(signal) => signal.into_raw(), WaitType::Stopped(signal) => 0x7f | (signal.into_raw() << 8), WaitType::Continued => 0xffff, @@ -159,7 +203,9 @@ impl WaitType { // TODO: CLD_TRAPPED match self { WaitType::Exited(status) => (status, CLD_EXITED), - WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => (signal.into_raw(), CLD_DUMPED), + WaitType::Signaled(signal @ SIGNAL_COREDUMP!()) => { + (signal.into_raw(), CLD_DUMPED) + } WaitType::Signaled(signal) => (signal.into_raw(), CLD_KILLED), WaitType::Stopped(signal) => (signal.into_raw(), CLD_STOPPED), WaitType::Continued => (Signal::SIGCONT.into_raw(), CLD_CONTINUED), @@ -194,7 +240,11 @@ impl ProcessBuilder { } } - pub async fn clone_from(mut self, process: Arc, clone_args: &CloneArgs) -> Self { + pub async fn clone_from( + mut self, + process: Arc, + clone_args: &CloneArgs, + ) -> Self { let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) { process.mm_list.new_shared().await } else { @@ -243,7 +293,10 @@ impl ProcessBuilder { self } - pub fn build(self, process_list: &mut ProcessList) -> (Arc, Arc) { + pub fn build( + self, + process_list: &mut ProcessList, + ) -> (Arc, Arc) { let mm_list = self.mm_list.unwrap_or_else(|| MMList::new()); let process = Arc::new(Process { @@ -254,18 +307,23 @@ impl ProcessBuilder { parent: RCUPointer::empty(), pgroup: RCUPointer::empty(), session: RCUPointer::empty(), - inner: Locked::new( - ProcessInner { - children: BTreeMap::new(), - threads: BTreeMap::new(), - }, + children: Locked::new( + RBTree::new(ProcessChildren::NEW), + process_list, + ), + threads: Locked::new( + RBTree::new(ProcessThreads::NEW), process_list, ), + all_procs_link: RBTreeAtomicLink::new(), + group_procs_link: RBTreeAtomicLink::new(), + siblings_link: RBTreeAtomicLink::new(), }); process_list.add_process(&process); - let thread_builder = self.thread_builder.expect("Thread builder is not set"); + let thread_builder = + self.thread_builder.expect("Thread builder is not set"); let thread = thread_builder .process(process.clone()) .tid(process.pid) @@ -281,10 +339,7 @@ impl ProcessBuilder { pgroup.add_member(&process, process_list.prove_mut()); pgroup } - None => ProcessGroupBuilder::new() - .leader(&process) - .session(session.clone()) - .build(process_list), + None => ProcessGroup::new(&process, &session, process_list), }; if let Some(parent) = &self.parent { @@ -304,30 +359,30 @@ impl ProcessBuilder { impl Process { pub fn raise(&self, signal: Signal, procs: Proof<'_, ProcessList>) { - let inner = self.inner.access(procs); - for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) { + let threads = self.threads.access(procs); + for thread in threads.iter() { if let RaiseResult::Finished = thread.raise(signal) { break; } } } - pub(super) fn add_child(&self, child: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .inner - .access_mut(procs) - .children - .insert(child.pid, Arc::downgrade(child)) - .is_none()); + pub fn add_child( + &self, + child: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_procs_link.is_linked(), "Dead process"); + self.children.access_mut(procs).insert(child.clone()); } - pub(super) fn add_thread(&self, thread: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .inner - .access_mut(procs) - .threads - .insert(thread.tid, Arc::downgrade(thread)) - .is_none()); + pub fn add_thread( + &self, + thread: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_procs_link.is_linked(), "Dead process"); + self.threads.access_mut(procs).insert(thread.clone()); } pub async fn wait( @@ -354,12 +409,7 @@ impl Process { break object; } - if self - .inner - .access(waits.process_list.prove()) - .children - .is_empty() - { + if self.children.access(waits.process_list.prove()).is_empty() { return Err(ECHILD); } @@ -375,12 +425,12 @@ impl Process { Ok(Some(wait_object)) } else { let mut procs = ProcessList::get().write().await; - procs.remove_process(wait_object.pid).await; + procs.remove_process(wait_object.pid); assert!(self - .inner - .access_mut(procs.prove_mut()) .children - .remove(&wait_object.pid) + .access_mut(procs.prove_mut()) + .find_mut(&wait_object.pid) + .remove() .is_some()); Ok(Some(wait_object)) @@ -396,15 +446,17 @@ impl Process { if process_list.try_find_session(self.pid).is_some() { return Err(EPERM); } + + self.pgroup(process_list.prove()) + .remove_member(self, &mut process_list); + let session = Session::new(self, &mut process_list); - let pgroup = ProcessGroupBuilder::new() - .leader(self) - .session(session.clone()) - .build(&mut process_list); + let pgroup = ProcessGroup::new(self, &session, &mut process_list); - let old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap(); - let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); - old_pgroup.remove_member(self.pid, process_list.prove_mut()); + let old_session = + unsafe { self.session.swap(Some(session.clone())) }.unwrap(); + let old_pgroup = + unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap(); call_rcu(move || { drop(old_session); @@ -417,47 +469,56 @@ impl Process { /// Set the process group id of the process to `pgid`. /// /// This function does the actual work. - fn do_setpgid(self: &Arc, pgid: u32, procs: &mut ProcessList) -> KResult<()> { + fn do_setpgid( + self: &Arc, + pgid: u32, + procs: &mut ProcessList, + ) -> KResult<()> { // SAFETY: We are holding the process list lock. let session = unsafe { self.session.load_locked().unwrap() }; - let pgroup = unsafe { self.pgroup.load_locked().unwrap() }; // Changing the process group of a session leader is not allowed. if session.sid == self.pid { return Err(EPERM); } - let new_pgroup = if let Some(new_pgroup) = procs.try_find_pgroup(pgid) { + let cur_pgroup = self.pgroup(procs.prove()).clone(); + let existing_pgroup = procs.try_find_pgroup(pgid); + + if let Some(new_pgroup) = &existing_pgroup { // Move us to an existing process group. // Check that the two groups are in the same session. - if new_pgroup.session.upgrade().unwrap().sid != session.sid { + if new_pgroup.session.sid != session.sid { return Err(EPERM); } // If we are already in the process group, we are done. - if new_pgroup.pgid == pgroup.pgid { + if new_pgroup.pgid == cur_pgroup.pgid { return Ok(()); } - - new_pgroup.add_member(self, procs.prove_mut()); - - new_pgroup } else { // Create a new process group only if `pgid` matches our `pid`. if pgid != self.pid { return Err(EPERM); } + } - ProcessGroupBuilder::new() - .leader(self) - .session(session.clone()) - .build(procs) - }; + // Permission checks done. Let's do the actual work. + cur_pgroup.remove_member(self, procs); - pgroup.remove_member(self.pid, procs.prove_mut()); + let new_pgroup; + if let Some(pgroup) = existing_pgroup { + pgroup.add_member(self, procs.prove_mut()); + new_pgroup = pgroup; + } else { + new_pgroup = ProcessGroup::new(self, &session, procs); + } - let old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap(); - call_rcu(move || drop(old_pgroup)); + unsafe { + // SAFETY: `cur_pgroup` held above. + self.pgroup.swap(Some(new_pgroup)); + } + call_rcu(move || drop(cur_pgroup)); Ok(()) } @@ -475,15 +536,14 @@ impl Process { let child = { // If `pid` refers to one of our children, the thread leaders must be // in out children list. - let children = &self.inner.access(procs.prove()).children; - let child = { - let child = children.get(&pid); - child.and_then(Weak::upgrade).ok_or(ESRCH)? - }; + let children = self.children.access(procs.prove()); + let child = children.find(&pid).clone_pointer().ok_or(ESRCH)?; // Changing the process group of a child is only allowed // if we are in the same session. - if child.session(procs.prove()).sid != self.session(procs.prove()).sid { + if child.session(procs.prove()).sid + != self.session(procs.prove()).sid + { return Err(EPERM); } @@ -497,19 +557,28 @@ impl Process { } /// Provide locked (consistent) access to the session. - pub fn session<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, Session> { + pub fn session<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, Session> { // SAFETY: We are holding the process list lock. unsafe { self.session.load_locked() }.unwrap() } /// Provide locked (consistent) access to the process group. - pub fn pgroup<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, ProcessGroup> { + pub fn pgroup<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, ProcessGroup> { // SAFETY: We are holding the process list lock. unsafe { self.pgroup.load_locked() }.unwrap() } /// Provide locked (consistent) access to the parent process. - pub fn parent<'r>(&'r self, _procs: Proof<'r, ProcessList>) -> BorrowedArc<'r, Process> { + pub fn parent<'r>( + &'r self, + _procs: Proof<'r, ProcessList>, + ) -> BorrowedArc<'r, Process> { // SAFETY: We are holding the process list lock. unsafe { self.parent.load_locked() }.unwrap() } @@ -520,16 +589,25 @@ impl Process { } /// Provide RCU locked (maybe inconsistent) access to the process group. - pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<'_, ProcessGroup>> { + pub fn pgroup_rcu( + &self, + ) -> RCUReadGuard<'_, BorrowedArc<'_, ProcessGroup>> { self.pgroup.load().unwrap() } /// Provide RCU locked (maybe inconsistent) access to the parent process. - pub fn parent_rcu(&self) -> Option>> { + pub fn parent_rcu( + &self, + ) -> Option>> { self.parent.load() } - pub fn notify(&self, signal: Option, wait: WaitObject, procs: Proof<'_, ProcessList>) { + pub fn notify( + &self, + signal: Option, + wait: WaitObject, + procs: Proof<'_, ProcessList>, + ) { self.wait_list.notify(wait); if let Some(signal) = signal { @@ -607,8 +685,11 @@ impl Entry<'_, '_, '_> { WaitId::Any => true, WaitId::Pid(pid) => item.pid == pid, WaitId::Pgid(pgid) => { - if let Some(process) = self.process_list.try_find_process(item.pid) { - return process.pgroup(self.process_list.prove()).pgid == pgid; + if let Some(process) = + self.process_list.try_find_process(item.pid) + { + return process.pgroup(self.process_list.prove()).pgid + == pgid; } false } @@ -622,7 +703,10 @@ impl Entry<'_, '_, '_> { } } - pub fn wait(self, no_block: bool) -> impl core::future::Future> + Send { + pub fn wait( + self, + no_block: bool, + ) -> impl core::future::Future> + Send { let wait_procs = self.wait_procs.unlock(); async move { diff --git a/src/kernel/task/process_group.rs b/src/kernel/task/process_group.rs index 137c5191..8c708b5c 100644 --- a/src/kernel/task/process_group.rs +++ b/src/kernel/task/process_group.rs @@ -1,87 +1,121 @@ -use super::{Process, ProcessList, Session}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use alloc::sync::{Arc, Weak}; + +use eonix_sync::{AsProofMut, Locked, Proof, ProofMut}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use eonix_sync::{Locked, Proof, ProofMut}; use posix_types::signal::Signal; -pub struct ProcessGroupBuilder { - pgid: Option, - leader: Option>, - session: Option>, -} +use super::process::GroupProcs; +use super::{Process, ProcessList, Session}; -#[derive(Debug)] pub struct ProcessGroup { pub pgid: u32, - pub _leader: Weak, - pub session: Weak, + pub leader: Weak, + pub session: Arc, - pub processes: Locked>, ProcessList>, + pub procs: Locked, ProcessList>, + + all_groups_link: RBTreeAtomicLink, + session_groups_link: RBTreeAtomicLink, } -impl ProcessGroupBuilder { - pub const fn new() -> Self { - Self { - pgid: None, - leader: None, - session: None, - } - } +intrusive_adapter!(pub AllGroups = Arc: ProcessGroup { + all_groups_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub SessionGroups = Arc: ProcessGroup { + session_groups_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllGroups { + type Key = u32; - pub fn leader(mut self, leader: &Arc) -> Self { - self.pgid = Some(leader.pid); - self.leader = Some(Arc::downgrade(leader)); - self + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pgid } +} + +impl KeyAdapter<'_> for SessionGroups { + type Key = u32; - pub fn session(mut self, session: Arc) -> Self { - self.session = Some(session); - self + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.pgid } +} - pub fn build(self, process_list: &mut ProcessList) -> Arc { - let pgid = self.pgid.expect("PGID is not set"); - let leader = self.leader.expect("Leader is not set"); - let session = self.session.expect("Session is not set"); +impl ProcessGroup { + /// Create a pgroup and add it to the global pgroup list. + /// Add the pgroup to the session. + /// + /// # Panics + /// Panics if `leader` is already in some pgroup. + pub fn new( + leader: &Arc, + session: &Arc, + procs: &mut ProcessList, + ) -> Arc { + let pgid = leader.pid; + let pgroup_procs = { + let mut list = RBTree::new(GroupProcs::new()); + list.insert(leader.clone()); + list + }; let pgroup = Arc::new(ProcessGroup { pgid, - session: Arc::downgrade(&session), - processes: Locked::new(BTreeMap::from([(pgid, leader.clone())]), process_list), - _leader: leader, + session: session.clone(), + procs: Locked::new(pgroup_procs, procs), + leader: Arc::downgrade(leader), + all_groups_link: RBTreeAtomicLink::new(), + session_groups_link: RBTreeAtomicLink::new(), }); - process_list.add_pgroup(&pgroup); - session.add_member(process_list, &pgroup); + procs.add_pgroup(&pgroup); + session.add_member(&pgroup, procs.prove_mut()); pgroup } -} -impl ProcessGroup { - pub(super) fn add_member(&self, process: &Arc, procs: ProofMut<'_, ProcessList>) { - assert!(self - .processes - .access_mut(procs) - .insert(process.pid, Arc::downgrade(process)) - .is_none()); + /// Add `process` to the pgroup. + /// + /// # Panics + /// Panics if `process` is already in some pgroup or the pgroup is dead. + pub fn add_member( + &self, + process: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_groups_link.is_linked(), "Dead pgroup"); + self.procs.access_mut(procs).insert(process.clone()); } - pub(super) fn remove_member(&self, pid: u32, procs: ProofMut<'_, ProcessList>) { - let processes = self.processes.access_mut(procs); - assert!(processes.remove(&pid).is_some()); - if processes.is_empty() { - self.session - .upgrade() - .unwrap() - .remove_member(self.pgid, procs); + pub fn remove_member( + self: &Arc, + process: &Arc, + procs: &mut ProcessList, + ) { + let members = self.procs.access_mut(procs.prove_mut()); + assert!( + members.find_mut(&process.pid).remove().is_some(), + "Not a member" + ); + + if !members.is_empty() { + return; } + + self.session.remove_member(self, procs); + procs.remove_pgroup(self); } pub fn raise(&self, signal: Signal, procs: Proof<'_, ProcessList>) { - let processes = self.processes.access(procs); - for process in processes.values().map(|p| p.upgrade().unwrap()) { + let members = self.procs.access(procs); + for process in members.iter() { process.raise(signal, procs); } } diff --git a/src/kernel/task/process_list.rs b/src/kernel/task/process_list.rs index c676d22e..f3371f25 100644 --- a/src/kernel/task/process_list.rs +++ b/src/kernel/task/process_list.rs @@ -1,37 +1,41 @@ -use alloc::collections::btree_map::BTreeMap; use alloc::collections::vec_deque::VecDeque; -use alloc::sync::{Arc, Weak}; +use alloc::sync::Arc; use core::pin::pin; -use core::sync::atomic::Ordering; use eonix_runtime::scheduler::RUNTIME; use eonix_sync::{AsProof as _, AsProofMut as _, RwLock, Spin, WaitList}; +use intrusive_collections::RBTree; use super::loader::LoadInfo; +use super::process::AllProcs; +use super::process_group::AllGroups; +use super::session::AllSessions; +use super::thread::AllThreads; use super::{ - alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, ThreadBuilder, WaitObject, + alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, + ThreadBuilder, WaitObject, }; -use crate::rcu::rcu_sync; +use crate::rcu::call_rcu; pub struct ProcessList { /// The init process. init: Option>, /// All threads. - threads: BTreeMap>, + threads: RBTree, /// All processes. - processes: BTreeMap>, + procs: RBTree, /// All process groups. - pgroups: BTreeMap>, + pgroups: RBTree, /// All sessions. - sessions: BTreeMap>, + sessions: RBTree, } static GLOBAL_PROC_LIST: RwLock = RwLock::new(ProcessList { init: None, - threads: BTreeMap::new(), - processes: BTreeMap::new(), - pgroups: BTreeMap::new(), - sessions: BTreeMap::new(), + threads: RBTree::new(AllThreads::NEW), + procs: RBTree::new(AllProcs::NEW), + pgroups: RBTree::new(AllGroups::NEW), + sessions: RBTree::new(AllSessions::NEW), }); impl ProcessList { @@ -40,43 +44,64 @@ impl ProcessList { } pub fn add_session(&mut self, session: &Arc) { - self.sessions.insert(session.sid, Arc::downgrade(session)); + self.sessions.insert(session.clone()); } pub fn add_pgroup(&mut self, pgroup: &Arc) { - self.pgroups.insert(pgroup.pgid, Arc::downgrade(pgroup)); + self.pgroups.insert(pgroup.clone()); } pub fn add_process(&mut self, process: &Arc) { - self.processes.insert(process.pid, Arc::downgrade(process)); + self.procs.insert(process.clone()); } pub fn add_thread(&mut self, thread: &Arc) { - self.threads.insert(thread.tid, thread.clone()); + self.threads.insert(thread.clone()); } - pub async fn remove_process(&mut self, pid: u32) { + pub fn remove_process(&mut self, pid: u32) { // Thread group leader has the same tid as the pid. - if let Some(thread) = self.threads.remove(&pid) { - self.processes.remove(&pid); - - // SAFETY: We wait until all references are dropped below with `rcu_sync()`. - let session = unsafe { thread.process.session.swap(None) }.unwrap(); - let pgroup = unsafe { thread.process.pgroup.swap(None) }.unwrap(); - let _parent = unsafe { thread.process.parent.swap(None) }.unwrap(); - pgroup.remove_member(pid, self.prove_mut()); - rcu_sync().await; - - if Arc::strong_count(&pgroup) == 1 { - self.pgroups.remove(&pgroup.pgid); - } + let Some(_) = self.threads.find_mut(&pid).remove() else { + panic!("Thread {} not found", pid); + }; - if Arc::strong_count(&session) == 1 { - self.sessions.remove(&session.sid); - } - } else { + let Some(proc) = self.procs.find_mut(&pid).remove() else { panic!("Process {} not found", pid); - } + }; + + // SAFETY: `call_rcu` below. + let session = unsafe { proc.session.swap(None) }.unwrap(); + let pgroup = unsafe { proc.pgroup.swap(None) }.unwrap(); + let parent = unsafe { proc.parent.swap(None) }.unwrap(); + + pgroup.remove_member(&proc, self); + + call_rcu(move || { + drop(session); + drop(pgroup); + drop(parent); + }); + } + + pub fn remove_thread(&mut self, thread: &Arc) { + assert!( + self.threads.find_mut(&thread.tid).remove().is_some(), + "Double remove" + ); + } + + pub fn remove_session(&mut self, session: &Arc) { + assert!( + self.sessions.find_mut(&session.sid).remove().is_some(), + "Double remove" + ); + } + + pub fn remove_pgroup(&mut self, pgroup: &Arc) { + assert!( + self.pgroups.find_mut(&pgroup.pgid).remove().is_some(), + "Double remove" + ); } fn set_init_process(&mut self, init: Arc) { @@ -88,20 +113,20 @@ impl ProcessList { self.init.as_ref().unwrap() } - pub fn try_find_thread(&self, tid: u32) -> Option<&Arc> { - self.threads.get(&tid) + pub fn try_find_thread(&self, tid: u32) -> Option> { + self.threads.find(&tid).clone_pointer() } pub fn try_find_process(&self, pid: u32) -> Option> { - self.processes.get(&pid).and_then(Weak::upgrade) + self.procs.find(&pid).clone_pointer() } pub fn try_find_pgroup(&self, pgid: u32) -> Option> { - self.pgroups.get(&pgid).and_then(Weak::upgrade) + self.pgroups.find(&pgid).clone_pointer() } pub fn try_find_session(&self, sid: u32) -> Option> { - self.sessions.get(&sid).and_then(Weak::upgrade) + self.sessions.find(&sid).clone_pointer() } pub async fn sys_init(load_info: LoadInfo) { @@ -118,9 +143,6 @@ impl ProcessList { process_list.set_init_process(process); - // TODO!!!: Remove this. - thread.files.open_console(); - RUNTIME.spawn(Reaper::daemon()); RUNTIME.spawn(thread.run()); } @@ -152,18 +174,19 @@ impl Reaper { let process = &thread.process; if process.pid == 1 && thread.tid == process.pid { - panic!("init exited"); + panic!("init exited: {}", alloc_pid()); } let mut procs = ProcessList::get().write().await; - let inner = process.inner.access_mut(procs.prove_mut()); - - thread.dead.store(true, Ordering::SeqCst); - if thread.tid != process.pid { - procs.threads.remove(&thread.tid); - inner.threads.remove(&thread.tid).unwrap(); + let threads = process.threads.access_mut(procs.prove_mut()); + assert!( + threads.find_mut(&thread.tid).remove().is_some(), + "Thread gone?" + ); + + procs.remove_thread(&thread); } // main thread exit @@ -172,11 +195,11 @@ impl Reaper { thread.files.close_all().await; + let session = process.session(procs.prove()).clone(); // If we are the session leader, we should drop the control terminal. - if process.session(procs.prove()).sid == process.pid { - if let Some(terminal) = process.session(procs.prove()).drop_control_terminal().await - { - terminal.drop_session().await; + if session.sid == process.pid { + if let Some(terminal) = session.control_terminal() { + terminal.drop_session(procs.prove()); } } @@ -184,16 +207,14 @@ impl Reaper { process.mm_list.release(); // Make children orphans (adopted by init) - { - let init = procs.init_process(); - inner.children.retain(|_, child| { - let child = child.upgrade().unwrap(); - // SAFETY: `child.parent` must be ourself. So we don't need to free it. - unsafe { child.parent.swap(Some(init.clone())) }; - init.add_child(&child, procs.prove_mut()); - - false - }); + let init = procs.init_process(); + let children = process.children.access_mut(procs.prove_mut()); + for child in children.take() { + // XXX: May buggy. Check here again. + // SAFETY: `child.parent` must be ourself. + // So we don't need to free it. + unsafe { child.parent.swap(Some(init.clone())) }; + init.add_child(&child, procs.prove_mut()); } let mut init_notify = procs.init_process().notify_batch(); diff --git a/src/kernel/task/session.rs b/src/kernel/task/session.rs index a7b57afd..899aa395 100644 --- a/src/kernel/task/session.rs +++ b/src/kernel/task/session.rs @@ -1,117 +1,170 @@ -use super::{Process, ProcessGroup, ProcessList, Thread}; -use crate::kernel::constants::EPERM; -use crate::{kernel::Terminal, prelude::*}; -use alloc::{ - collections::btree_map::BTreeMap, - sync::{Arc, Weak}, +use alloc::sync::{Arc, Weak}; + +use eonix_sync::{AsProof as _, AsProofMut, Locked, Proof, ProofMut}; +use intrusive_collections::{ + intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink, }; -use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLock}; use posix_types::signal::Signal; -#[derive(Debug)] +use super::process_group::SessionGroups; +use super::{Process, ProcessGroup, ProcessList}; +use crate::kernel::constants::EPERM; +use crate::kernel::Terminal; +use crate::prelude::*; + struct SessionJobControl { - /// Foreground process group - foreground: Weak, + foreground: Option>, control_terminal: Option>, } -#[allow(dead_code)] -#[derive(Debug)] pub struct Session { pub sid: u32, pub leader: Weak, - job_control: RwLock, + job_control: Spin, + groups: Locked, ProcessList>, + all_sessions_link: RBTreeAtomicLink, +} - groups: Locked>, ProcessList>, +intrusive_adapter!(pub AllSessions = Arc: Session { + all_sessions_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllSessions { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.sid + } } impl Session { /// Create a session and add it to the global session list. - pub fn new(leader: &Arc, process_list: &mut ProcessList) -> Arc { + pub fn new(leader: &Arc, proclist: &mut ProcessList) -> Arc { let session = Arc::new(Self { sid: leader.pid, leader: Arc::downgrade(leader), - job_control: RwLock::new(SessionJobControl { - foreground: Weak::new(), + job_control: Spin::new(SessionJobControl { + foreground: None, control_terminal: None, }), - groups: Locked::new( - BTreeMap::new(), - // SAFETY: `procs` must be the global process list, which won't be moved. - process_list, - ), + groups: Locked::new(RBTree::new(SessionGroups::NEW), proclist), + all_sessions_link: RBTreeAtomicLink::new(), }); - process_list.add_session(&session); + proclist.add_session(&session); session } - pub(super) fn add_member(&self, procs: &mut ProcessList, pgroup: &Arc) { - let groups = self.groups.access_mut(procs.prove_mut()); - let old = groups.insert(pgroup.pgid, Arc::downgrade(pgroup)); - assert!(old.is_none(), "Process group already exists"); + pub fn add_member( + &self, + pgroup: &Arc, + procs: ProofMut<'_, ProcessList>, + ) { + assert!(self.all_sessions_link.is_linked(), "Dead session"); + self.groups.access_mut(procs).insert(pgroup.clone()); } - pub(super) fn remove_member(&self, pgid: u32, procs: ProofMut<'_, ProcessList>) { - assert!(self.groups.access_mut(procs).remove(&pgid).is_some()); + pub fn remove_member( + self: &Arc, + pgroup: &Arc, + procs: &mut ProcessList, + ) { + let members = self.groups.access_mut(procs.prove_mut()); + assert!( + members.find_mut(&pgroup.pgid).remove().is_some(), + "Not a member" + ); + + if let Some(fg_pgroup) = self.foreground_pgroup() { + if fg_pgroup.pgid == pgroup.pgid { + let _ = self.set_foreground_pgroup(None); + } + } + + if !members.is_empty() { + return; + } + + // Recycle dead session. + procs.remove_session(self); } - pub async fn foreground(&self) -> Option> { - self.job_control.read().await.foreground.upgrade() + pub fn leader(&self) -> Option> { + self.leader.upgrade() + } + + pub fn foreground_pgroup(&self) -> Option> { + self.job_control.lock().foreground.clone() + } + + pub fn control_terminal(&self) -> Option> { + self.job_control.lock().control_terminal.clone() } /// Set the foreground process group identified by `pgid`. /// The process group must belong to the session. - pub async fn set_foreground_pgid( + pub fn set_foreground_pgroup( &self, - pgid: u32, - procs: Proof<'_, ProcessList>, + pgroup: Option<&Arc>, ) -> KResult<()> { - if let Some(group) = self.groups.access(procs).get(&pgid) { - self.job_control.write().await.foreground = group.clone(); - Ok(()) - } else { - // TODO: Check if the process group refers to an existing process group. - // That's not a problem though, the operation will fail anyway. - Err(EPERM) + if let Some(pgroup) = pgroup { + if pgroup.session.sid != self.sid { + return Err(EPERM); + } } + + self.job_control.lock().foreground = pgroup.cloned(); + Ok(()) } - /// Only session leaders can set the control terminal. - /// Make sure we've checked that before calling this function. - pub async fn set_control_terminal( + /// Set our controlling terminal to `terminal`. Only meant to be called by + /// the session leader. The pgroup that the session leader is in becomes the + /// new foreground pgroup. + /// + /// # Panics + /// Panics if we have a controlling terminal already + /// or the session leader is gone. + pub fn _set_control_terminal( self: &Arc, terminal: &Arc, - forced: bool, procs: Proof<'_, ProcessList>, - ) -> KResult<()> { - let mut job_control = self.job_control.write().await; - if let Some(_) = job_control.control_terminal.as_ref() { - if let Some(session) = terminal.session().await.as_ref() { - if session.sid == self.sid { - return Ok(()); - } - } - return Err(EPERM); - } - terminal.set_session(self, forced).await?; + ) { + let mut job_control = self.job_control.lock(); + let leader = self.leader().expect("Leader is gone?"); + + assert!( + job_control.control_terminal.is_none(), + "We have a controlling terminal already" + ); + job_control.control_terminal = Some(terminal.clone()); - job_control.foreground = Arc::downgrade(&Thread::current().process.pgroup(procs)); - Ok(()) + job_control.foreground = Some(leader.pgroup(procs).clone()); } /// Drop the control terminal reference inside the session. - /// DO NOT TOUCH THE TERMINAL'S SESSION FIELD. - pub async fn drop_control_terminal(&self) -> Option> { - let mut inner = self.job_control.write().await; - inner.foreground = Weak::new(); - inner.control_terminal.take() + /// Send SIGHUP and then SIGCONT to our foreground pgroup. + pub fn _drop_control_terminal(&self, procs: Proof<'_, ProcessList>) { + let foreground = { + let mut inner = self.job_control.lock(); + inner.control_terminal = None; + inner.foreground.take() + }; + + if let Some(foreground) = foreground { + foreground.raise(Signal::SIGHUP, procs); + foreground.raise(Signal::SIGCHLD, procs); + } } pub async fn raise_foreground(&self, signal: Signal) { - if let Some(fg) = self.foreground().await { - let procs = ProcessList::get().read().await; - fg.raise(signal, procs.prove()); - } + let Some(fg) = self.foreground_pgroup() else { + return; + }; + + let procs = ProcessList::get().read().await; + fg.raise(signal, procs.prove()); } } diff --git a/src/kernel/task/thread.rs b/src/kernel/task/thread.rs index 7e005875..76c56dcc 100644 --- a/src/kernel/task/thread.rs +++ b/src/kernel/task/thread.rs @@ -14,6 +14,7 @@ use eonix_hal::traits::trap::{RawTrapContext, TrapReturn, TrapType}; use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, VAddr}; use eonix_sync::AsProofMut as _; +use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink}; use pointers::BorrowedArc; use posix_types::signal::Signal; use stalloc::UnsafeStalloc; @@ -84,9 +85,44 @@ pub struct Thread { pub dead: AtomicBool, pub exit_status: Spin>, + /// Link in the global thread list. + all_threads_link: RBTreeAtomicLink, + + /// Link in the process's thread list. + process_threads_link: RBTreeAtomicLink, + inner: Spin, } +intrusive_adapter!(pub AllThreads = Arc: Thread { + all_threads_link: RBTreeAtomicLink +}); +intrusive_adapter!(pub ProcessThreads = Arc: Thread { + process_threads_link: RBTreeAtomicLink +}); + +impl KeyAdapter<'_> for AllThreads { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.tid + } +} + +impl KeyAdapter<'_> for ProcessThreads { + type Key = u32; + + fn get_key( + &self, + value: &'_ ::Value, + ) -> Self::Key { + value.tid + } +} + impl ThreadBuilder { pub fn new() -> Self { Self { @@ -139,12 +175,18 @@ impl ThreadBuilder { self } - pub fn set_child_tid(mut self, set_child_tid: Option>) -> Self { + pub fn set_child_tid( + mut self, + set_child_tid: Option>, + ) -> Self { self.set_child_tid = set_child_tid; self } - pub fn clear_child_tid(mut self, clear_child_tid: Option>) -> Self { + pub fn clear_child_tid( + mut self, + clear_child_tid: Option>, + ) -> Self { self.clear_child_tid = clear_child_tid; self } @@ -171,7 +213,11 @@ impl ThreadBuilder { } /// Clone the thread from another thread. - pub fn clone_from(self, thread: &Thread, clone_args: &CloneArgs) -> KResult { + pub fn clone_from( + self, + thread: &Thread, + clone_args: &CloneArgs, + ) -> KResult { let inner = thread.inner.lock(); let mut trap_ctx = thread.trap_ctx.borrow().clone(); @@ -199,11 +245,12 @@ impl ThreadBuilder { FileArray::new_cloned(&thread.files) }; - let signal_list = if clone_args.flags.contains(CloneFlags::CLONE_SIGHAND) { - SignalList::new_shared(&thread.signal_list) - } else { - SignalList::new_cloned(&thread.signal_list) - }; + let signal_list = + if clone_args.flags.contains(CloneFlags::CLONE_SIGHAND) { + SignalList::new_shared(&thread.signal_list) + } else { + SignalList::new_cloned(&thread.signal_list) + }; Ok(self .files(files) @@ -241,6 +288,8 @@ impl ThreadBuilder { fpu_state: AtomicUniqueRefCell::new(fpu_state), dead: AtomicBool::new(false), exit_status: Spin::new(None), + all_threads_link: RBTreeAtomicLink::new(), + process_threads_link: RBTreeAtomicLink::new(), inner: Spin::new(ThreadInner { name, tls: self.tls, @@ -281,7 +330,10 @@ impl Thread { Ok(()) } - pub fn set_robust_list(&self, robust_list_address: Option>) { + pub fn set_robust_list( + &self, + robust_list_address: Option>, + ) { self.inner.lock().robust_list_address = robust_list_address; } @@ -371,7 +423,10 @@ impl Thread { while !self.is_dead() { if self.signal_list.has_pending_signal() { self.signal_list - .handle(&mut self.trap_ctx.borrow(), &mut self.fpu_state.borrow()) + .handle( + &mut self.trap_ctx.borrow(), + &mut self.fpu_state.borrow(), + ) .await; } @@ -399,7 +454,9 @@ impl Thread { } let mms = &self.process.mm_list; - if let Err(signal) = mms.handle_user_page_fault(addr, error_code).await { + if let Err(signal) = + mms.handle_user_page_fault(addr, error_code).await + { self.signal_list.raise(signal); } } @@ -409,8 +466,12 @@ impl Thread { TrapType::Fault(Fault::InvalidOp) => { self.signal_list.raise(Signal::SIGILL); } - TrapType::Fault(Fault::Unknown(_)) => unimplemented!("Unhandled fault"), - TrapType::Breakpoint => unimplemented!("Breakpoint in user space"), + TrapType::Fault(Fault::Unknown(_)) => { + unimplemented!("Unhandled fault") + } + TrapType::Breakpoint => { + unimplemented!("Breakpoint in user space") + } TrapType::Irq { callback } => callback(default_irq_handler), TrapType::Timer { callback } => { callback(timer_interrupt); @@ -424,11 +485,16 @@ impl Thread { return; } - if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await { + if let Some(retval) = + self.handle_syscall(thd_alloc, no, args).await + { let mut trap_ctx = self.trap_ctx.borrow(); trap_ctx.set_user_return_value(retval); - #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] + #[cfg(any( + target_arch = "riscv64", + target_arch = "loongarch64" + ))] { let pc = trap_ctx.get_program_counter(); trap_ctx.set_program_counter(pc + 4); @@ -472,7 +538,10 @@ impl Thread { }) .await; - assert!(self.is_dead(), "`real_run` returned before the thread die?"); + assert!( + self.is_dead(), + "`real_run` returned before the thread die?" + ); ProcessList::send_to_reaper(self); } } @@ -499,7 +568,10 @@ pub async fn yield_now() { impl Future for Yield { type Output = (); - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + fn poll( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll { if self.as_mut().yielded { Poll::Ready(()) } else { diff --git a/src/kernel/terminal.rs b/src/kernel/terminal.rs index 734655b7..ddc3cc1f 100644 --- a/src/kernel/terminal.rs +++ b/src/kernel/terminal.rs @@ -1,18 +1,19 @@ -use super::{ - task::{ProcessList, Session, Thread}, - user::{UserPointer, UserPointerMut}, -}; -use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; -use crate::{io::Buffer, prelude::*, sync::CondVar}; -use alloc::{ - collections::vec_deque::VecDeque, - sync::{Arc, Weak}, -}; +use alloc::collections::vec_deque::VecDeque; +use alloc::sync::{Arc, Weak}; + use bitflags::bitflags; use eonix_log::ConsoleWrite; -use eonix_sync::{AsProof as _, Mutex}; +use eonix_sync::{Mutex, Proof}; use posix_types::signal::Signal; +use super::constants::ESRCH; +use super::task::{ProcessList, Session, Thread}; +use super::user::{UserPointer, UserPointerMut}; +use crate::io::Buffer; +use crate::kernel::constants::{EINTR, ENOTTY, EPERM}; +use crate::prelude::*; +use crate::sync::CondVar; + const BUFFER_SIZE: usize = 4096; const NCCS: usize = 19; @@ -351,12 +352,12 @@ pub trait TerminalDevice: Send + Sync { struct TerminalInner { termio: Termios, - session: Weak, buffer: VecDeque, } pub struct Terminal { inner: Mutex, + session: Spin>, device: Arc, cv: CondVar, } @@ -400,9 +401,9 @@ impl Terminal { Arc::new(Self { inner: Mutex::new(TerminalInner { termio: Termios::new_standard(), - session: Weak::new(), buffer: VecDeque::with_capacity(BUFFER_SIZE), }), + session: Spin::new(Weak::new()), cv: CondVar::new(), device, }) @@ -447,15 +448,21 @@ impl Terminal { } async fn signal(&self, inner: &mut TerminalInner, signal: Signal) { - if let Some(session) = inner.session.upgrade() { + if let Some(session) = self.session() { session.raise_foreground(signal).await; } + if !inner.termio.noflsh() { self.clear_read_buffer(inner); } } - async fn echo_and_signal(&self, inner: &mut TerminalInner, ch: u8, signal: Signal) { + async fn echo_and_signal( + &self, + inner: &mut TerminalInner, + ch: u8, + signal: Signal, + ) { self.echo_char(inner, ch); self.signal(inner, signal).await; } @@ -481,13 +488,19 @@ impl Terminal { match ch { 0xff => {} ch if ch == inner.termio.vintr() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGINT).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGINT) + .await } ch if ch == inner.termio.vquit() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGQUIT).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGQUIT) + .await } ch if ch == inner.termio.vsusp() => { - return self.echo_and_signal(&mut inner, ch, Signal::SIGTSTP).await + return self + .echo_and_signal(&mut inner, ch, Signal::SIGTSTP) + .await } _ => {} } @@ -517,8 +530,12 @@ impl Terminal { match ch { b'\r' if inner.termio.igncr() => {} - b'\r' if inner.termio.icrnl() => return self.do_commit_char(&mut inner, b'\n'), - b'\n' if inner.termio.inlcr() => return self.do_commit_char(&mut inner, b'\r'), + b'\r' if inner.termio.icrnl() => { + return self.do_commit_char(&mut inner, b'\n') + } + b'\n' if inner.termio.inlcr() => { + return self.do_commit_char(&mut inner, b'\r') + } _ => self.do_commit_char(&mut inner, ch), } } @@ -589,26 +606,30 @@ impl Terminal { pub async fn ioctl(&self, request: TerminalIORequest<'_>) -> KResult<()> { match request { TerminalIORequest::GetProcessGroup(pgid_pointer) => { - if let Some(session) = self.inner.lock().await.session.upgrade() { - if let Some(pgroup) = session.foreground().await { - return pgid_pointer.write(pgroup.pgid); - } - } + let Some(session) = self.session() else { + return Err(ENOTTY); + }; + + let Some(pgroup) = session.foreground_pgroup() else { + return Err(ENOTTY); + }; - Err(ENOTTY) + pgid_pointer.write(pgroup.pgid) } TerminalIORequest::SetProcessGroup(pgid) => { let pgid = pgid.read()?; let procs = ProcessList::get().read().await; - let inner = self.inner.lock().await; - let session = inner.session.upgrade(); + let Some(session) = self.session() else { + return Err(ENOTTY); + }; - if let Some(session) = session { - session.set_foreground_pgid(pgid, procs.prove()).await - } else { - Err(ENOTTY) - } + let Some(pgroup) = procs.try_find_pgroup(pgid) else { + return Err(ESRCH); + }; + + session.set_foreground_pgroup(Some(&pgroup))?; + Ok(()) } TerminalIORequest::GetWindowSize(ptr) => { // TODO: Get the actual window size @@ -630,9 +651,12 @@ impl Terminal { let mut inner = self.inner.lock().await; // TODO: We ignore unknown bits for now. - inner.termio.iflag = TermioIFlags::from_bits_truncate(user_termios.iflag as u16); - inner.termio.oflag = TermioOFlags::from_bits_truncate(user_termios.oflag as u16); - inner.termio.lflag = TermioLFlags::from_bits_truncate(user_termios.lflag as u16); + inner.termio.iflag = + TermioIFlags::from_bits_truncate(user_termios.iflag as u16); + inner.termio.oflag = + TermioOFlags::from_bits_truncate(user_termios.oflag as u16); + inner.termio.lflag = + TermioLFlags::from_bits_truncate(user_termios.lflag as u16); inner.termio.cflag = user_termios.cflag; inner.termio.line = user_termios.line; inner.termio.cc = user_termios.cc; @@ -642,30 +666,52 @@ impl Terminal { } } - /// Assign the `session` to this terminal. Drop the previous session if `forced` is true. - pub async fn set_session(&self, session: &Arc, forced: bool) -> KResult<()> { - let mut inner = self.inner.lock().await; - if let Some(session) = inner.session.upgrade() { + pub fn session(&self) -> Option> { + self.session.lock().upgrade() + } + + /// Drop our current controlled session. The old session lose its controlling + /// terminal and all processes in it will receive a SIGHUP and then SIGCONT. + pub fn drop_session(&self, procs: Proof<'_, ProcessList>) { + let session = + core::mem::replace(&mut *self.session.lock(), Weak::new()); + let Some(old_session) = session.upgrade() else { + return; + }; + + old_session._drop_control_terminal(procs); + } + + /// Assign the `session` to this terminal. + /// Drop the previous session if `forced` is true. + pub async fn set_session( + self: &Arc, + session: &Arc, + forced: bool, + procs: Proof<'_, ProcessList>, + ) -> KResult<()> { + let mut cur_session = self.session.lock(); + + // XXX: Holding spinlock for too long? + if let Some(old_session) = cur_session.upgrade() { + if old_session.sid == session.sid { + return Ok(()); + } + if !forced { - Err(EPERM) - } else { - session.drop_control_terminal().await; - inner.session = Arc::downgrade(&session); - Ok(()) + return Err(EPERM); } - } else { - // Sessions should set their `control_terminal` field. - inner.session = Arc::downgrade(&session); - Ok(()) + + // TODO: Check whether the caller has the CAP_SYS_ADMIN capability. + + // We've stolen the terminal from the old session. + old_session._drop_control_terminal(procs); } - } - pub async fn drop_session(&self) { - self.inner.lock().await.session = Weak::new(); - } + *cur_session = Arc::downgrade(session); + session._set_control_terminal(self, procs); - pub async fn session(&self) -> Option> { - self.inner.lock().await.session.upgrade() + Ok(()) } } diff --git a/src/kernel/vfs/file/terminal_file.rs b/src/kernel/vfs/file/terminal_file.rs index f318c5b2..04a022b5 100644 --- a/src/kernel/vfs/file/terminal_file.rs +++ b/src/kernel/vfs/file/terminal_file.rs @@ -1,24 +1,46 @@ -use super::{File, FileType, PollEvent}; -use crate::{ - io::{Buffer, Stream, StreamRead}, - kernel::{ - constants::{EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP}, - terminal::TerminalIORequest, - user::{UserPointer, UserPointerMut}, - Terminal, - }, - prelude::KResult, -}; use alloc::sync::Arc; + +use eonix_sync::AsProof; use posix_types::open::OpenFlags; +use super::{File, FileType, PollEvent}; +use crate::io::{Buffer, Stream, StreamRead}; +use crate::kernel::constants::{ + EINVAL, TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP, +}; +use crate::kernel::task::{ProcessList, Thread}; +use crate::kernel::terminal::TerminalIORequest; +use crate::kernel::user::{UserPointer, UserPointerMut}; +use crate::kernel::Terminal; +use crate::prelude::KResult; + pub struct TerminalFile { terminal: Arc, } impl TerminalFile { - pub fn new(tty: Arc, flags: OpenFlags) -> File { - File::new(flags, FileType::Terminal(TerminalFile { terminal: tty })) + pub async fn open( + thread: &Thread, + terminal: &Arc, + flags: OpenFlags, + ) -> File { + let set_control_tty = !flags.contains(OpenFlags::O_NOCTTY); + + let procs = ProcessList::get().read().await; + let session = thread.process.session(procs.prove()); + + // We only set the control terminal if the process is the session leader. + if set_control_tty && session.sid == thread.process.pid { + // Silently fail if we can't set the control terminal. + let _ = terminal.set_session(&session, false, procs.prove()).await; + } + + File::new( + flags, + FileType::Terminal(TerminalFile { + terminal: terminal.clone(), + }), + ) } pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult { @@ -43,11 +65,21 @@ impl TerminalFile { pub async fn ioctl(&self, request: usize, arg3: usize) -> KResult<()> { self.terminal .ioctl(match request as u32 { - TCGETS => TerminalIORequest::GetTermios(UserPointerMut::with_addr(arg3)?), - TCSETS => TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?), - TIOCGPGRP => TerminalIORequest::GetProcessGroup(UserPointerMut::with_addr(arg3)?), - TIOCSPGRP => TerminalIORequest::SetProcessGroup(UserPointer::with_addr(arg3)?), - TIOCGWINSZ => TerminalIORequest::GetWindowSize(UserPointerMut::with_addr(arg3)?), + TCGETS => TerminalIORequest::GetTermios( + UserPointerMut::with_addr(arg3)?, + ), + TCSETS => { + TerminalIORequest::SetTermios(UserPointer::with_addr(arg3)?) + } + TIOCGPGRP => TerminalIORequest::GetProcessGroup( + UserPointerMut::with_addr(arg3)?, + ), + TIOCSPGRP => TerminalIORequest::SetProcessGroup( + UserPointer::with_addr(arg3)?, + ), + TIOCGWINSZ => TerminalIORequest::GetWindowSize( + UserPointerMut::with_addr(arg3)?, + ), _ => return Err(EINVAL), }) .await diff --git a/src/kernel/vfs/filearray.rs b/src/kernel/vfs/filearray.rs index 609d969c..c0b6a49e 100644 --- a/src/kernel/vfs/filearray.rs +++ b/src/kernel/vfs/filearray.rs @@ -1,19 +1,22 @@ use alloc::sync::Arc; use intrusive_collections::rbtree::Entry; -use intrusive_collections::{intrusive_adapter, Bound, KeyAdapter, RBTree, RBTreeAtomicLink}; +use intrusive_collections::{ + intrusive_adapter, Bound, KeyAdapter, RBTree, RBTreeAtomicLink, +}; use itertools::FoldWhile::{Continue, Done}; use itertools::Itertools; use posix_types::open::{FDFlags, OpenFlags}; use super::file::{File, InodeFile, Pipe}; use super::types::{Format, Permission}; -use super::{Spin, TerminalFile}; -use crate::kernel::console::get_console; +use super::Spin; use crate::kernel::constants::{ - EBADF, EISDIR, ENOTDIR, ENXIO, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL, + EBADF, EISDIR, ENOTDIR, ENXIO, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, + F_SETFD, F_SETFL, }; use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal}; +use crate::kernel::task::Thread; use crate::kernel::vfs::dentry::Dentry; use crate::kernel::CharDevice; use crate::prelude::*; @@ -80,7 +83,11 @@ impl FDAllocator { self.min_avail = FD(0); } - fn find_available(&mut self, from: FD, files: &RBTree) -> FD { + fn find_available( + &mut self, + from: FD, + files: &RBTree, + ) -> FD { files .range(Bound::Included(&from), Bound::Unbounded) .fold_while(from, |current, OpenFile { fd, .. }| { @@ -143,7 +150,8 @@ impl FileArray { let other_inner = other.inner.lock(); for file in other_inner.files.iter() { - let new_file = OpenFile::new(file.fd, file.flags, file.file.dup()); + let new_file = + OpenFile::new(file.fd, file.flags, file.file.dup()); new_files.insert(new_file); } (new_files, other_inner.fd_alloc.clone()) @@ -223,7 +231,12 @@ impl FileArray { /// Duplicates the file to a new file descriptor, returning the old file /// description to be dropped. - fn dup_to_no_close(&self, old_fd: FD, new_fd: FD, fd_flags: FDFlags) -> KResult> { + fn dup_to_no_close( + &self, + old_fd: FD, + new_fd: FD, + fd_flags: FDFlags, + ) -> KResult> { let mut inner = self.inner.lock(); let (files, fd_alloc) = inner.split_borrow(); @@ -240,7 +253,8 @@ impl FileArray { Entry::Occupied(mut entry) => { let mut file = entry.remove().unwrap(); file.flags = fd_flags; - let old_file = core::mem::replace(&mut file.file, new_file_data); + let old_file = + core::mem::replace(&mut file.file, new_file_data); entry.insert(file); @@ -249,8 +263,15 @@ impl FileArray { } } - pub async fn dup_to(&self, old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult { - if let Some(old_file) = self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? { + pub async fn dup_to( + &self, + old_fd: FD, + new_fd: FD, + flags: OpenFlags, + ) -> KResult { + if let Some(old_file) = + self.dup_to_no_close(old_fd, new_fd, flags.as_fd_flags())? + { old_file.close().await; } @@ -277,6 +298,7 @@ impl FileArray { pub async fn open( &self, + thread: &Thread, dentry: &Arc, flags: OpenFlags, perm: Permission, @@ -300,7 +322,7 @@ impl FileArray { let file = if inode.format == Format::CHR { let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?; - device.open(flags)? + device.open(thread, flags).await? } else { InodeFile::new(dentry.clone(), flags) }; @@ -323,7 +345,8 @@ impl FileArray { F_DUPFD | F_DUPFD_CLOEXEC => { let ofile = cursor.get().ok_or(EBADF)?; - let cloexec = cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); + let cloexec = + cmd == F_DUPFD_CLOEXEC || ofile.flags.close_on_exec(); let flags = cloexec .then_some(FDFlags::FD_CLOEXEC) .unwrap_or(FDFlags::empty()); @@ -342,7 +365,9 @@ impl FileArray { cursor.insert(ofile); 0 } - F_GETFL => cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize, + F_GETFL => { + cursor.get().ok_or(EBADF)?.file.get_flags().bits() as usize + } F_SETFL => { cursor .get() @@ -357,35 +382,6 @@ impl FileArray { Ok(ret) } - - /// Only used for init process. - pub fn open_console(&self) { - let mut inner = self.inner.lock(); - let (files, fd_alloc) = inner.split_borrow(); - - let (stdin, stdout, stderr) = ( - fd_alloc.next_fd(files), - fd_alloc.next_fd(files), - fd_alloc.next_fd(files), - ); - let console_terminal = get_console().expect("No console terminal"); - - inner.do_insert( - stdin, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - inner.do_insert( - stdout, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - inner.do_insert( - stderr, - FDFlags::FD_CLOEXEC, - TerminalFile::new(console_terminal.clone(), OpenFlags::empty()), - ); - } } impl FileArrayInner { @@ -397,7 +393,9 @@ impl FileArrayInner { fn do_insert(&mut self, fd: FD, flags: FDFlags, file: File) { match self.files.entry(&fd) { Entry::Occupied(_) => { - panic!("File descriptor {fd:?} already exists in the file array."); + panic!( + "File descriptor {fd:?} already exists in the file array." + ); } Entry::Vacant(insert_cursor) => { insert_cursor.insert(OpenFile::new(fd, flags, file)); @@ -405,7 +403,9 @@ impl FileArrayInner { } } - fn split_borrow(&mut self) -> (&mut RBTree, &mut FDAllocator) { + fn split_borrow( + &mut self, + ) -> (&mut RBTree, &mut FDAllocator) { let Self { files, fd_alloc } = self; (files, fd_alloc) } diff --git a/user-programs/init_script_riscv64.sh b/user-programs/init_script_riscv64.sh index b5ce95d7..f67e2a27 100644 --- a/user-programs/init_script_riscv64.sh +++ b/user-programs/init_script_riscv64.sh @@ -56,6 +56,9 @@ busybox mknod -m 666 /dev/vdb b 8 16 busybox mknod -m 666 /dev/ttyS0 c 4 64 busybox mknod -m 666 /dev/ttyS1 c 4 65 +exec < "$TERMINAL" +exec > "$TERMINAL" 2>&1 + info "deploying busybox..." busybox mkdir -p /bin /lib @@ -106,8 +109,7 @@ int main() { } EOF -# shellcheck disable=SC2094 -exec sh -l < "$TERMINAL" > "$TERMINAL" 2> "$TERMINAL" +exec sh -l # We don't have a working init yet, so we use busybox sh directly for now. # exec /mnt/init /bin/sh -c 'exec sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0' From 2392ac19d2138a9abf307252a4adb78b18afa405 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:31:13 +0800 Subject: [PATCH 17/25] style: reformat the files related to next patches Reformat the files with new the format style to make the real changes clearer. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 50 +++++++++++++----- crates/eonix_hal/src/arch/riscv64/mm.rs | 52 ++++++++++++------- src/kernel_init.rs | 3 +- 3 files changed, 70 insertions(+), 35 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index b2305f99..4e5afcfb 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -7,8 +7,12 @@ use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering}; use eonix_hal_traits::mm::Memory; use eonix_mm::address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange}; -use eonix_mm::page_table::{PageAttribute, PageTable, PagingMode, TableAttribute, PTE as _}; -use eonix_mm::paging::{Folio, FrameAlloc, PageAccess, PageBlock, PAGE_SIZE, PFN}; +use eonix_mm::page_table::{ + PageAttribute, PageTable, PagingMode, TableAttribute, PTE as _, +}; +use eonix_mm::paging::{ + Folio, FrameAlloc, PageAccess, PageBlock, PAGE_SIZE, PFN, +}; use eonix_percpu::PercpuArea; use fdt::Fdt; use riscv::asm::sfence_vma_all; @@ -25,11 +29,13 @@ use super::time::set_next_timer; use crate::arch::cpu::CPU; use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; use crate::arch::mm::{ - ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, RawPageTableSv48, - GLOBAL_PAGE_TABLE, + ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, + RawPageTableSv48, GLOBAL_PAGE_TABLE, }; use crate::bootstrap::BootStrapData; -use crate::mm::{ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator}; +use crate::mm::{ + ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator, +}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -64,7 +70,8 @@ static PT1: BootPageTable = { BootPageTable(arr) }; -static BSP_PAGE_ALLOC: AtomicPtr> = AtomicPtr::new(core::ptr::null_mut()); +static BSP_PAGE_ALLOC: AtomicPtr> = + AtomicPtr::new(core::ptr::null_mut()); static AP_COUNT: AtomicUsize = AtomicUsize::new(0); static AP_STACK: AtomicUsize = AtomicUsize::new(0); @@ -130,11 +137,14 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { } let start = unsafe { - ((&BOOT_STACK_START) as *const &'static [u8; 4096 * 16]).read_volatile() as *const _ - as usize + ((&BOOT_STACK_START) as *const &'static [u8; 4096 * 16]).read_volatile() + as *const _ as usize }; let bootstrap_data = BootStrapData { - early_stack: PRange::new(PAddr::from(start), PAddr::from(start + 4096 * 16)), + early_stack: PRange::new( + PAddr::from(start), + PAddr::from(start + 4096 * 16), + ), allocator: Some(real_allocator), }; @@ -179,7 +189,11 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { sfence_vma_all(); unsafe { - core::ptr::write_bytes(KERNEL_BSS_START.addr() as *mut (), 0, BSS_LENGTH as usize); + core::ptr::write_bytes( + KERNEL_BSS_START.addr() as *mut (), + 0, + BSS_LENGTH as usize, + ); } unsafe { @@ -255,7 +269,8 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { stack_range }; - let old = BSP_PAGE_ALLOC.swap((&raw const *page_alloc) as *mut _, Ordering::Release); + let old = BSP_PAGE_ALLOC + .swap((&raw const *page_alloc) as *mut _, Ordering::Release); assert!(old.is_null()); while AP_STACK @@ -324,7 +339,12 @@ unsafe extern "C" fn _ap_start(hart_id: usize) { fn get_ap_stack() -> usize { while AP_SEM - .compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed) + .compare_exchange_weak( + false, + true, + Ordering::Acquire, + Ordering::Relaxed, + ) .is_err() { core::hint::spin_loop(); @@ -344,12 +364,14 @@ fn get_ap_stack() -> usize { } fn ap_entry(hart_id: usize, stack_bottom: PAddr) -> ! { - let stack_range = PRange::new(stack_bottom - (1 << 3) * PAGE_SIZE, stack_bottom); + let stack_range = + PRange::new(stack_bottom - (1 << 3) * PAGE_SIZE, stack_bottom); { // SAFETY: Acquire all the work done by the BSP and other APs. let alloc = loop { - let alloc = BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::AcqRel); + let alloc = + BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::AcqRel); if !alloc.is_null() { break alloc; diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index f67646cf..45d44c6f 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -4,8 +4,8 @@ use core::ptr::NonNull; use eonix_hal_traits::mm::Memory; use eonix_mm::address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr}; use eonix_mm::page_table::{ - PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable, - TableAttribute, PTE, + PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, + RawPageTable, TableAttribute, PTE, }; use eonix_mm::paging::{BasicFolio, Folio, PageAccess, PageBlock, PFN}; use eonix_sync_base::LazyLock; @@ -115,7 +115,9 @@ impl RawAttribute for PageAttribute64 { table_attr |= TableAttribute::PRESENT; } - if table_attr.contains(TableAttribute::PRESENT) && self.0 & (PA_R | PA_W | PA_X) != 0 { + if table_attr.contains(TableAttribute::PRESENT) + && self.0 & (PA_R | PA_W | PA_X) != 0 + { return None; } @@ -139,7 +141,9 @@ impl RawAttribute for PageAttribute64 { page_attr |= PageAttribute::PRESENT; } - if page_attr.contains(PageAttribute::PRESENT) && (self.0 & (PA_R | PA_W | PA_X) == 0) { + if page_attr.contains(PageAttribute::PRESENT) + && (self.0 & (PA_R | PA_W | PA_X) == 0) + { return None; } @@ -278,18 +282,22 @@ impl Memory for ArchMemory { let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)).chain( - Self::present_ram() - .filter(move |range| range.end() > paddr_after_kimage_aligned) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) + core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) + .chain( + Self::present_ram() + .filter(move |range| { + range.end() > paddr_after_kimage_aligned + }) + .map(move |range| { + if range.start() < paddr_after_kimage_aligned { + let (_, right) = + range.split_at(paddr_after_kimage_aligned); + right + } else { + range + } + }), + ) } } @@ -314,17 +322,21 @@ where let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)).chain( - self.filter(move |range| range.end() > paddr_after_kimage_aligned) + core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) + .chain( + self.filter(move |range| { + range.end() > paddr_after_kimage_aligned + }) .map(move |range| { if range.start() < paddr_after_kimage_aligned { - let (_, right) = range.split_at(paddr_after_kimage_aligned); + let (_, right) = + range.split_at(paddr_after_kimage_aligned); right } else { range } }), - ) + ) } } diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 65af41e4..64c220b2 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -19,7 +19,8 @@ fn setup_kernel_page_array(alloc: BasicPageAllocRef, count_pages: usize) { // Map kernel page array. const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000); - let range = VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages); + let range = + VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages); for pte in global_page_table.iter_kernel(range) { let attr = PageAttribute::PRESENT | PageAttribute::WRITE From 6a007864070aa997379256117584da67ec7d49d8 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:32:55 +0800 Subject: [PATCH 18/25] riscv64, hal: simplify ArchMemory::free_ram implementation We already have `FDT.present_ram().free_ram()`. Remove the impl in `ArchMemory` to avoid confusion. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/mm.rs | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 45d44c6f..3b6df07c 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -274,30 +274,7 @@ impl Memory for ArchMemory { } fn free_ram() -> impl Iterator { - unsafe extern "C" { - fn __kernel_start(); - fn __kernel_end(); - } - - let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); - let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) - .chain( - Self::present_ram() - .filter(move |range| { - range.end() > paddr_after_kimage_aligned - }) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = - range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) + FDT.present_ram().free_ram() } } From 53ae1851d9da456532f55457b34be5cbcf65ad11 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:51:32 +0800 Subject: [PATCH 19/25] style: add helper macros to retrieve symbol constants - Add `extern_symbol_value` to retrieve far relative symbol values. - Get `BSS_LENGTH` and `__kernel_end` using `extern_symbol_addr`. - Get `_ap_start` using `extern_symbol_value`. Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 28 +++++++------------ crates/eonix_hal/src/arch/riscv64/mm.rs | 8 ++---- crates/eonix_hal/src/lib.rs | 14 ++++++++++ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index 4e5afcfb..d317e448 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -36,6 +36,7 @@ use crate::bootstrap::BootStrapData; use crate::mm::{ ArchMemory, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator, }; +use crate::{extern_symbol_addr, extern_symbol_value}; #[unsafe(link_section = ".bootstrap.stack")] static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16]; @@ -156,10 +157,6 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { } } -unsafe extern "C" { - fn BSS_LENGTH(); -} - /// TODO: /// 对kernel image添加更细的控制,或者不加也行 fn setup_kernel_page_table(alloc: BasicPageAllocRef) { @@ -171,17 +168,17 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { let attr = PageAttribute::WRITE | PageAttribute::READ - | PageAttribute::EXECUTE | PageAttribute::GLOBAL | PageAttribute::PRESENT; const KERNEL_BSS_START: VAddr = VAddr::from(0xffffffff40000000); + let bss_length = extern_symbol_addr!(BSS_LENGTH); + // Map kernel BSS - let bss_range = VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize); + let bss_range = VRange::from(KERNEL_BSS_START).grow(bss_length); for pte in global_page_table.iter_kernel(bss_range) { let page = alloc.alloc().unwrap(); - let attr = attr.difference(PageAttribute::EXECUTE); pte.set(page.into_raw(), attr.into()); } @@ -192,7 +189,7 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { core::ptr::write_bytes( KERNEL_BSS_START.addr() as *mut (), 0, - BSS_LENGTH as usize, + bss_length, ); } @@ -247,15 +244,6 @@ fn setup_cpu(alloc: impl FrameAlloc, hart_id: usize) { percpu_area.register(cpu.cpuid()); } -fn get_ap_start_addr() -> usize { - unsafe extern "C" { - fn _ap_start(); - } - static AP_START_VALUE: &'static unsafe extern "C" fn() = - &(_ap_start as unsafe extern "C" fn()); - unsafe { (AP_START_VALUE as *const _ as *const usize).read_volatile() } -} - fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { let local_hart_id = CPU::local().cpuid(); let mut ap_count = 0; @@ -286,7 +274,11 @@ fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell) { } unsafe { - hart_start(hart_id, PhysicalAddress::new(get_ap_start_addr()), 0); + hart_start( + hart_id, + PhysicalAddress::new(extern_symbol_value!(_ap_start)), + 0, + ); } while AP_COUNT.load(Ordering::Acquire) == ap_count { diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 3b6df07c..6362cdca 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -16,6 +16,7 @@ use riscv::register::satp; use super::config::mm::{PHYS_MAP_VIRT, ROOT_PAGE_TABLE_PFN}; use super::fdt::{FdtExt, FDT}; use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::extern_symbol_addr; use crate::mm::BasicPageAlloc; const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN); @@ -291,12 +292,9 @@ where T: PresentRam, { fn free_ram(self) -> impl Iterator { - unsafe extern "C" { - fn __kernel_start(); - fn __kernel_end(); - } + let kernel_end = extern_symbol_addr!(__kernel_end) - KIMAGE_OFFSET; + let kernel_end = PAddr::from(kernel_end).ceil(); - let kernel_end = PAddr::from(__kernel_end as usize - KIMAGE_OFFSET); let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index d3bf7825..3f49a326 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -69,3 +69,17 @@ macro_rules! extern_symbol_addr { $crate::symbol_addr!($sym, $type) }}; } + +#[macro_export] +macro_rules! extern_symbol_value { + ($sym:ident) => {{ + unsafe extern "C" { + fn $sym(); + } + + static SYMBOL_ADDR: &'static unsafe extern "C" fn() = + &($sym as unsafe extern "C" fn()); + + unsafe { (SYMBOL_ADDR as *const _ as *const usize).read_volatile() } + }}; +} From a117be1530a10b0e3a56b597f088c66f5abc3060 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 01:53:22 +0800 Subject: [PATCH 20/25] style: reformat files Signed-off-by: greatbridf --- crates/eonix_hal/src/lib.rs | 4 +- src/kernel/task/signal/signal_action.rs | 52 +++++++++++++++---------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/crates/eonix_hal/src/lib.rs b/crates/eonix_hal/src/lib.rs index 3f49a326..d8696994 100644 --- a/crates/eonix_hal/src/lib.rs +++ b/crates/eonix_hal/src/lib.rs @@ -11,7 +11,9 @@ pub mod mm; pub mod trap; pub mod fence { - pub use crate::arch::fence::{memory_barrier, read_memory_barrier, write_memory_barrier}; + pub use crate::arch::fence::{ + memory_barrier, read_memory_barrier, write_memory_barrier, + }; } pub mod fpu { diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index 708f9802..cbf81935 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -1,22 +1,24 @@ -use super::{KResult, SAVED_DATA_SIZE}; -use crate::{ - io::BufferFill as _, - kernel::{ - constants::{EFAULT, EINVAL}, - syscall::UserMut, - user::UserBuffer, - }, -}; -use alloc::{collections::btree_map::BTreeMap, sync::Arc}; +use alloc::collections::btree_map::BTreeMap; +use alloc::sync::Arc; use core::arch::naked_asm; -use eonix_hal::{fpu::FpuState, traits::trap::RawTrapContext, trap::TrapContext}; + +use eonix_hal::fpu::FpuState; +use eonix_hal::traits::trap::RawTrapContext; +use eonix_hal::trap::TrapContext; use eonix_mm::address::{Addr as _, AddrOps as _, VAddr}; use eonix_sync::Spin; -use posix_types::{ - ctypes::Long, - signal::{SigAction, SigActionHandler, SigActionRestorer, SigSet, Signal, TryFromSigAction}, - SIGNAL_NOW, +use posix_types::ctypes::Long; +use posix_types::signal::{ + SigAction, SigActionHandler, SigActionRestorer, SigSet, Signal, + TryFromSigAction, }; +use posix_types::SIGNAL_NOW; + +use super::{KResult, SAVED_DATA_SIZE}; +use crate::io::BufferFill as _; +use crate::kernel::constants::{EFAULT, EINVAL}; +use crate::kernel::syscall::UserMut; +use crate::kernel::user::UserBuffer; #[cfg(target_arch = "x86_64")] #[unsafe(naked)] @@ -139,7 +141,9 @@ impl SignalAction { handler, restorer, .. } = self else { - unreachable!("Default and Ignore actions should not be handled here"); + unreachable!( + "Default and Ignore actions should not be handled here" + ); }; let current_sp = VAddr::from(trap_ctx.get_stack_pointer()); @@ -167,7 +171,9 @@ impl SignalAction { target_arch = "riscv64", target_arch = "loongarch64" )))] - compile_error!("`vdso_sigreturn` is not implemented for this architecture"); + compile_error!( + "`vdso_sigreturn` is not implemented for this architecture" + ); #[cfg(target_arch = "x86_64")] { @@ -178,19 +184,22 @@ impl SignalAction { unsafe { // SAFETY: To prevent the compiler from optimizing this into `la` instructions // and causing a linking error. - (VDSO_SIGRETURN_ADDR as *const _ as *const usize).read_volatile() + (VDSO_SIGRETURN_ADDR as *const _ as *const usize) + .read_volatile() } } #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - static VDSO_RT_SIGRETURN_ADDR: &'static unsafe extern "C" fn() = + static VDSO_RT_SIGRETURN_ADDR: + &'static unsafe extern "C" fn() = &(vdso_rt_sigreturn as unsafe extern "C" fn()); unsafe { // SAFETY: To prevent the compiler from optimizing this into `la` instructions // and causing a linking error. - (VDSO_RT_SIGRETURN_ADDR as *const _ as *const usize).read_volatile() + (VDSO_RT_SIGRETURN_ADDR as *const _ as *const usize) + .read_volatile() } } }; @@ -201,7 +210,8 @@ impl SignalAction { Some(return_address), &[Long::new_val(signal.into_raw() as _).get()], |vaddr, data| -> Result<(), u32> { - let mut buffer = UserBuffer::new(UserMut::new(vaddr), data.len())?; + let mut buffer = + UserBuffer::new(UserMut::new(vaddr), data.len())?; for ch in data.iter() { buffer.copy(&ch)?.ok_or(EFAULT)?; } From 4d272fe8b2d4a965c1db133febe3d5cda6dc77f4 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 02:07:08 +0800 Subject: [PATCH 21/25] riscv64, linker: make sure vdso lies inside .data With current ldscript, linkers will put vdso data after `__kernel_end`, which is buggy since we use the symbol to indicate the end of our kernel image and newly allocated pages may overwrite those positions. Change by place the vdso inside REGION_DATA. Remove old VDSO memory region. Align .data section end to page size border. Add a helper macro to retrieve .vdso section symbol addresses. Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/link.x | 4 ++- crates/eonix_hal/src/arch/riscv64/memory.x | 1 - crates/eonix_hal/src/link.x.in | 4 +-- src/kernel/task/signal/signal_action.rs | 31 +++++++++------------- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/link.x b/crates/eonix_hal/src/arch/riscv64/link.x index e348e1be..a74f0d0d 100644 --- a/crates/eonix_hal/src/arch/riscv64/link.x +++ b/crates/eonix_hal/src/arch/riscv64/link.x @@ -81,10 +81,12 @@ INSERT AFTER .rodata; SECTIONS { .vdso ALIGN(0x1000) : ALIGN(0x1000) { + VDSO_START = ABSOLUTE(.); + KEEP(*(.vdso .vdso.*)); . = ALIGN(0x1000); - } > VDSO AT> RAM + } > REGION_DATA AT> RAM VDSO_PADDR = LOADADDR(.vdso); } diff --git a/crates/eonix_hal/src/arch/riscv64/memory.x b/crates/eonix_hal/src/arch/riscv64/memory.x index 0dc7c4ff..f2029c9a 100644 --- a/crates/eonix_hal/src/arch/riscv64/memory.x +++ b/crates/eonix_hal/src/arch/riscv64/memory.x @@ -3,7 +3,6 @@ ENTRY(_start) MEMORY { RAM : org = 0x0000000080200000, len = 8M - VDSO : org = 0x00007f0000000000, len = 4K KBSS : org = 0xffffffff40000000, len = 2M KIMAGE : org = 0xffffffff80200000, len = 8M } diff --git a/crates/eonix_hal/src/link.x.in b/crates/eonix_hal/src/link.x.in index 81c269c2..eaabdfda 100644 --- a/crates/eonix_hal/src/link.x.in +++ b/crates/eonix_hal/src/link.x.in @@ -18,7 +18,7 @@ SECTIONS { __srodata = .; *(.rodata .rodata.*); - + . = ALIGN(8); PROVIDE(__eh_frame = .); @@ -41,7 +41,7 @@ SECTIONS { } > REGION_DATA AT> LINK_REGION_DATA - .data.after : + .data.after : ALIGN(0x1000) { __data_after = .; } > REGION_DATA AT> LINK_REGION_DATA diff --git a/src/kernel/task/signal/signal_action.rs b/src/kernel/task/signal/signal_action.rs index cbf81935..18348c32 100644 --- a/src/kernel/task/signal/signal_action.rs +++ b/src/kernel/task/signal/signal_action.rs @@ -20,6 +20,16 @@ use crate::kernel::constants::{EFAULT, EINVAL}; use crate::kernel::syscall::UserMut; use crate::kernel::user::UserBuffer; +macro_rules! vdso_sym_addr { + ($sym:expr) => {{ + const VDSO_START_VADDR: VAddr = VAddr::from(0x7f00_0000_0000); + let vdso_link_start = eonix_hal::extern_symbol_addr!(VDSO_START); + + eonix_hal::symbol_addr!($sym) - vdso_link_start + + VDSO_START_VADDR.addr() + }}; +} + #[cfg(target_arch = "x86_64")] #[unsafe(naked)] #[unsafe(link_section = ".vdso.sigreturn")] @@ -178,29 +188,12 @@ impl SignalAction { #[cfg(target_arch = "x86_64")] { // TODO: Check and use `vdso_rt_sigreturn` for x86 as well. - static VDSO_SIGRETURN_ADDR: &'static unsafe extern "C" fn() = - &(vdso_rt_sigreturn as unsafe extern "C" fn()); - - unsafe { - // SAFETY: To prevent the compiler from optimizing this into `la` instructions - // and causing a linking error. - (VDSO_SIGRETURN_ADDR as *const _ as *const usize) - .read_volatile() - } + vdso_sym_addr!(vdso_rt_sigreturn) } #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))] { - static VDSO_RT_SIGRETURN_ADDR: - &'static unsafe extern "C" fn() = - &(vdso_rt_sigreturn as unsafe extern "C" fn()); - - unsafe { - // SAFETY: To prevent the compiler from optimizing this into `la` instructions - // and causing a linking error. - (VDSO_RT_SIGRETURN_ADDR as *const _ as *const usize) - .read_volatile() - } + vdso_sym_addr!(vdso_rt_sigreturn) } }; From 5814c006a88ed2397b3e085e86e59c179f6cdcb2 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:08:28 +0800 Subject: [PATCH 22/25] riscv64: rewrite FDT and present free memory parsing Strip out the memory used by the kernel and FDT data out of free memory block returned by FDT. Closes: #54 ("Random kernel freezing on process creation / exiting") Fixes: 4351cf55739f3 ("partial work: fix riscv64 bootstrap") Signed-off-by: greatbridf --- .../eonix_hal/src/arch/riscv64/bootstrap.rs | 7 +- crates/eonix_hal/src/arch/riscv64/fdt.rs | 115 +++++++++++------- crates/eonix_hal/src/arch/riscv64/mm.rs | 36 +----- 3 files changed, 78 insertions(+), 80 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index d317e448..ccb32527 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -29,7 +29,7 @@ use super::time::set_next_timer; use crate::arch::cpu::CPU; use crate::arch::fdt::{init_dtb_and_fdt, FdtExt, FDT}; use crate::arch::mm::{ - ArchPagingMode, ArchPhysAccess, FreeRam, PageAccessImpl, PageAttribute64, + ArchPagingMode, ArchPhysAccess, PageAccessImpl, PageAttribute64, RawPageTableSv48, GLOBAL_PAGE_TABLE, }; use crate::bootstrap::BootStrapData; @@ -113,13 +113,12 @@ unsafe extern "C" fn _start(hart_id: usize, dtb_addr: usize) { } pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! { - let fdt = Fdt::from_ptr(ArchPhysAccess::as_ptr(dtb_addr).as_ptr()) - .expect("Failed to parse DTB from static memory."); + let fdt = unsafe { FdtExt::new(dtb_addr) }; let real_allocator = RefCell::new(BasicPageAlloc::new()); let alloc = BasicPageAllocRef::new(&real_allocator); - for range in fdt.present_ram().free_ram() { + for range in fdt.free_ram() { real_allocator.borrow_mut().add_range(range); } diff --git a/crates/eonix_hal/src/arch/riscv64/fdt.rs b/crates/eonix_hal/src/arch/riscv64/fdt.rs index 5efcc98d..908256c7 100644 --- a/crates/eonix_hal/src/arch/riscv64/fdt.rs +++ b/crates/eonix_hal/src/arch/riscv64/fdt.rs @@ -1,62 +1,95 @@ -use super::mm::{ArchPhysAccess, PresentRam}; -use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; -use core::sync::atomic::{AtomicPtr, Ordering}; -use eonix_mm::address::{PAddr, PRange, PhysAccess}; +use core::ops::Deref; +use core::sync::atomic::{AtomicPtr, AtomicUsize, Ordering}; + +use eonix_mm::address::{Addr, AddrOps, PAddr, PRange, PhysAccess}; use eonix_sync_base::LazyLock; use fdt::Fdt; -static DTB_VIRT_PTR: AtomicPtr = AtomicPtr::new(core::ptr::null_mut()); -pub static FDT: LazyLock> = LazyLock::new(|| unsafe { - Fdt::from_ptr(DTB_VIRT_PTR.load(Ordering::Acquire)) - .expect("Failed to parse DTB from static memory.") +use super::mm::ArchPhysAccess; +use crate::arch::riscv64::config::mm::KIMAGE_OFFSET; +use crate::extern_symbol_addr; + +static DTB_PADDR: AtomicUsize = AtomicUsize::new(0); +pub static FDT: LazyLock = LazyLock::new(|| unsafe { + FdtExt::new(PAddr::from_val(DTB_PADDR.load(Ordering::Relaxed))) }); -pub trait FdtExt { - fn harts(&self) -> impl Iterator; +pub struct FdtExt { + fdt: Fdt<'static>, + range: PRange, +} - fn hart_count(&self) -> usize { - self.harts().count() - } +impl FdtExt { + /// # Safety + /// The caller MUST ensure that [`addr`] points to valid FDT. + pub unsafe fn new(addr: PAddr) -> Self { + let fdt = unsafe { + Fdt::from_ptr(ArchPhysAccess::as_ptr(addr).as_ptr()) + .expect("Failed to parse DTB from static memory.") + }; - fn present_ram(&self) -> impl Iterator; -} + Self { + range: PRange::from(addr).grow(fdt.total_size()), + fdt, + } + } -impl FdtExt for Fdt<'_> { - fn harts(&self) -> impl Iterator { + pub fn harts(&self) -> impl Iterator { self.cpus().map(|cpu| cpu.ids().all()).flatten() } - fn present_ram(&self) -> impl Iterator + PresentRam { - struct Present(I); - impl PresentRam for Present where I: Iterator {} - impl Iterator for Present - where - I: Iterator, - { - type Item = PRange; - - fn next(&mut self) -> Option { - self.0.next() - } - } + pub fn hart_count(&self) -> usize { + self.harts().count() + } + pub fn present_ram(&self) -> impl Iterator { let mut index = 0; - Present(core::iter::from_fn(move || { - self.memory() + + core::iter::from_fn(move || { + let item = self + .memory() .regions() .filter_map(|region| { - region.size.map(|len| { - PRange::from(PAddr::from(region.starting_address as usize)).grow(len) - }) + let start = PAddr::from(region.starting_address as usize); + Some(start).zip(region.size) }) - .skip(index) - .next() - .inspect(|_| index += 1) - })) + .map(|(start, len)| PRange::from(start).grow(len)) + .nth(index); + + index += 1; + item + }) + } + + pub fn free_ram(&self) -> impl Iterator { + let kernel_end = extern_symbol_addr!(__kernel_end) - KIMAGE_OFFSET; + let kernel_end = PAddr::from(kernel_end).ceil(); + + // TODO: move this to some platform-specific crate + self.present_ram().map(move |mut range| { + // Strip out parts before __kernel_end + if range.overlap_with(&PRange::from(kernel_end)) { + (_, range) = range.split_at(kernel_end); + } + + // Strip out part after the FDT + if range.overlap_with(&self.range) { + (range, _) = range.split_at(self.range.start()); + } + + range + }) + } +} + +impl Deref for FdtExt { + type Target = Fdt<'static>; + + fn deref(&self) -> &Self::Target { + &self.fdt } } pub unsafe fn init_dtb_and_fdt(dtb_paddr: PAddr) { - let dtb_virt_ptr = ArchPhysAccess::as_ptr(dtb_paddr); - DTB_VIRT_PTR.store(dtb_virt_ptr.as_ptr(), Ordering::Release); + DTB_PADDR.store(dtb_paddr.addr(), Ordering::Relaxed); } diff --git a/crates/eonix_hal/src/arch/riscv64/mm.rs b/crates/eonix_hal/src/arch/riscv64/mm.rs index 6362cdca..7891f094 100644 --- a/crates/eonix_hal/src/arch/riscv64/mm.rs +++ b/crates/eonix_hal/src/arch/riscv64/mm.rs @@ -275,46 +275,12 @@ impl Memory for ArchMemory { } fn free_ram() -> impl Iterator { - FDT.present_ram().free_ram() + FDT.free_ram() } } pub type DefaultPagingMode = PagingModeSv48; -pub trait PresentRam: Iterator {} - -pub trait FreeRam: PresentRam { - fn free_ram(self) -> impl Iterator; -} - -impl FreeRam for T -where - T: PresentRam, -{ - fn free_ram(self) -> impl Iterator { - let kernel_end = extern_symbol_addr!(__kernel_end) - KIMAGE_OFFSET; - let kernel_end = PAddr::from(kernel_end).ceil(); - - let paddr_after_kimage_aligned = kernel_end.ceil_to(0x200000); - - core::iter::once(PRange::new(kernel_end, paddr_after_kimage_aligned)) - .chain( - self.filter(move |range| { - range.end() > paddr_after_kimage_aligned - }) - .map(move |range| { - if range.start() < paddr_after_kimage_aligned { - let (_, right) = - range.split_at(paddr_after_kimage_aligned); - right - } else { - range - } - }), - ) - } -} - #[inline(always)] pub fn flush_tlb(vaddr: usize) { sfence_vma(0, vaddr); From 20b12dfa221a87471f2fe49a3f5e7730bce3c2dc Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:12:29 +0800 Subject: [PATCH 23/25] sysinit: pointee type should be u8 when using ptr::write_bytes Using *const (), no bytes are written to the position, which might result in uninitialized memory access. Fixes: ebd3d1224c01 ("change(x86): optimize bootstrap code, remove kinit.cpp") Fixes: 191877a3acd0 ("feat(hal): impl basic single hart bootstrap for riscv64") Signed-off-by: greatbridf --- crates/eonix_hal/src/arch/riscv64/bootstrap.rs | 2 +- src/kernel_init.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs index ccb32527..b0c235aa 100644 --- a/crates/eonix_hal/src/arch/riscv64/bootstrap.rs +++ b/crates/eonix_hal/src/arch/riscv64/bootstrap.rs @@ -186,7 +186,7 @@ fn setup_kernel_page_table(alloc: BasicPageAllocRef) { unsafe { core::ptr::write_bytes( - KERNEL_BSS_START.addr() as *mut (), + KERNEL_BSS_START.addr() as *mut u8, 0, bss_length, ); diff --git a/src/kernel_init.rs b/src/kernel_init.rs index 64c220b2..2259f6cf 100644 --- a/src/kernel_init.rs +++ b/src/kernel_init.rs @@ -41,7 +41,7 @@ fn setup_kernel_page_array(alloc: BasicPageAllocRef, count_pages: usize) { unsafe { // SAFETY: We've just mapped the area with sufficient length. core::ptr::write_bytes( - V_KERNEL_PAGE_ARRAY_START.addr() as *mut (), + V_KERNEL_PAGE_ARRAY_START.addr() as *mut u8, 0, count_pages * PAGE_SIZE, ); From 74c7e0a73628697e9a7fa8d06fa82091dcdf25b4 Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:21:52 +0800 Subject: [PATCH 24/25] style: reformat file Signed-off-by: greatbridf --- crates/eonix_hal/src/mm.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/eonix_hal/src/mm.rs b/crates/eonix_hal/src/mm.rs index c4b9bb74..c6c6a369 100644 --- a/crates/eonix_hal/src/mm.rs +++ b/crates/eonix_hal/src/mm.rs @@ -7,8 +7,8 @@ use eonix_mm::page_table::PageTableAlloc; use eonix_mm::paging::{BasicFolio, FrameAlloc, PAGE_SIZE, PFN}; pub use crate::arch::mm::{ - flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, ArchMemory, - ArchPhysAccess, GLOBAL_PAGE_TABLE, + flush_tlb, flush_tlb_all, get_root_page_table_pfn, set_root_page_table_pfn, + ArchMemory, ArchPhysAccess, GLOBAL_PAGE_TABLE, }; pub struct BasicPageAlloc { @@ -87,7 +87,8 @@ impl BasicPageAlloc { panic!("Page allocator is full"); } - self.ranges[tail] = Some(PRange::new(range.start().ceil(), range.end().floor())); + self.ranges[tail] = + Some(PRange::new(range.start().ceil(), range.end().floor())); } pub fn alloc(&mut self, order: u32) -> PFN { @@ -147,7 +148,10 @@ impl<'a> ScopedAllocator<'a> { } } - pub fn with_alloc<'b, 'r, O>(&'r self, func: impl FnOnce(&'b ScopedAllocator<'a>) -> O) -> O + pub fn with_alloc<'b, 'r, O>( + &'r self, + func: impl FnOnce(&'b ScopedAllocator<'a>) -> O, + ) -> O where 'a: 'b, 'r: 'b, From 6a0f7033a51c53f3985dfeed42b792b7a0e69e3d Mon Sep 17 00:00:00 2001 From: greatbridf Date: Sat, 24 Jan 2026 03:24:11 +0800 Subject: [PATCH 25/25] hal, mm: alloc basic folios from low to high addr This can shorten qemu memory map. Signed-off-by: greatbridf --- crates/eonix_hal/src/mm.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/eonix_hal/src/mm.rs b/crates/eonix_hal/src/mm.rs index c6c6a369..ef006cb5 100644 --- a/crates/eonix_hal/src/mm.rs +++ b/crates/eonix_hal/src/mm.rs @@ -41,9 +41,8 @@ impl BasicPageAlloc { fn alloc_one(&mut self) -> PFN { assert_ne!(self.head, self.tail, "No free pages available"); let mut range = self.ranges[self.head].take().unwrap(); - range = range.shrink(PAGE_SIZE); - - let pfn = PFN::from(range.end()); + let pfn = PFN::from(range.start()); + range = PRange::new(range.start() + PAGE_SIZE, range.end()); if range.len() != 0 { self.ranges[self.head] = Some(range);