From d02a51c5d1f810053586b5798f4f6936d999058d Mon Sep 17 00:00:00 2001 From: branchseer Date: Fri, 21 Nov 2025 15:53:13 +0800 Subject: [PATCH] refactor: replace NativeString with Box --- Cargo.lock | 14 ++ crates/fspy/src/unix/mod.rs | 6 +- crates/fspy_preload_unix/src/client/mod.rs | 8 +- crates/fspy_shared/Cargo.toml | 2 +- crates/fspy_shared/src/ipc/channel/mod.rs | 6 +- crates/fspy_shared/src/ipc/mod.rs | 8 +- crates/fspy_shared/src/ipc/native_str.rs | 246 +++++++++------------ crates/fspy_shared_unix/src/exec/mod.rs | 5 +- crates/fspy_shared_unix/src/payload.rs | 9 +- crates/fspy_shared_unix/src/spawn/mod.rs | 10 +- 10 files changed, 154 insertions(+), 160 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3e141b1a..bf87d8d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -307,6 +307,20 @@ name = "bytemuck" version = "1.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] [[package]] name = "bytes" diff --git a/crates/fspy/src/unix/mod.rs b/crates/fspy/src/unix/mod.rs index 03bb0f13..0c6da788 100644 --- a/crates/fspy/src/unix/mod.rs +++ b/crates/fspy/src/unix/mod.rs @@ -8,7 +8,7 @@ use std::{io, path::Path}; #[cfg(target_os = "linux")] use fspy_seccomp_unotify::supervisor::supervise; -use fspy_shared::ipc::{NativeString, PathAccess, channel::channel}; +use fspy_shared::ipc::{NativeStr, PathAccess, channel::channel}; #[cfg(target_os = "macos")] use fspy_shared_unix::payload::Artifacts; use fspy_shared_unix::{ @@ -28,12 +28,12 @@ use crate::{ ipc::{OwnedReceiverLockGuard, SHM_CAPACITY}, }; -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct SpyImpl { #[cfg(target_os = "macos")] artifacts: Artifacts, - preload_path: NativeString, + preload_path: Box, } const PRELOAD_CDYLIB_BINARY: &[u8] = include_bytes!(env!("CARGO_CDYLIB_FILE_FSPY_PRELOAD_UNIX")); diff --git a/crates/fspy_preload_unix/src/client/mod.rs b/crates/fspy_preload_unix/src/client/mod.rs index 5be6dbd5..7a73c974 100644 --- a/crates/fspy_preload_unix/src/client/mod.rs +++ b/crates/fspy_preload_unix/src/client/mod.rs @@ -1,7 +1,9 @@ pub mod convert; pub mod raw_exec; -use std::{fmt::Debug, num::NonZeroUsize, sync::OnceLock}; +use std::{ + ffi::OsStr, fmt::Debug, num::NonZeroUsize, os::unix::ffi::OsStrExt as _, sync::OnceLock, +}; use bincode::{enc::write::SizeWriter, encode_into_slice, encode_into_writer}; use convert::{ToAbsolutePath, ToAccessMode}; @@ -55,7 +57,7 @@ impl Client { // ipc channel not available, skip sending return Ok(()); }; - let path = path_access.path.as_bstr(); + let path = path_access.path.as_os_str().as_bytes(); if path.starts_with(b"/dev/") || (cfg!(target_os = "linux") && (path.starts_with(b"/proc/") || path.starts_with(b"/sys/"))) @@ -101,7 +103,7 @@ impl Client { let Some(abs_path) = abs_path else { return Ok(Ok(())); }; - Ok(self.send(PathAccess { mode, path: abs_path.into() })) + Ok(self.send(PathAccess { mode, path: OsStr::from_bytes(abs_path).into() })) }) }??; diff --git a/crates/fspy_shared/Cargo.toml b/crates/fspy_shared/Cargo.toml index 5e53d1b7..5d00eef7 100644 --- a/crates/fspy_shared/Cargo.toml +++ b/crates/fspy_shared/Cargo.toml @@ -9,7 +9,7 @@ allocator-api2 = { workspace = true } bincode = { workspace = true } bitflags = { workspace = true } bstr = { workspace = true } -bytemuck = { workspace = true, features = ["must_cast"] } +bytemuck = { workspace = true, features = ["must_cast", "derive"] } shared_memory = { workspace = true, features = ["logging"] } thiserror = { workspace = true } tracing = { workspace = true } diff --git a/crates/fspy_shared/src/ipc/channel/mod.rs b/crates/fspy_shared/src/ipc/channel/mod.rs index be5a2b51..b6dfc085 100644 --- a/crates/fspy_shared/src/ipc/channel/mod.rs +++ b/crates/fspy_shared/src/ipc/channel/mod.rs @@ -11,12 +11,12 @@ use shm_io::{ShmReader, ShmWriter}; use tracing::debug; use uuid::Uuid; -use super::NativeString; +use super::NativeStr; /// Serializable configuration to create channel senders. #[derive(Encode, Decode, Clone, Debug)] pub struct ChannelConf { - lock_file_path: NativeString, + lock_file_path: Box, shm_id: Arc, shm_size: usize, } @@ -69,7 +69,7 @@ impl ChannelConf { pub struct Sender { writer: ShmWriter, - lock_file_path: NativeString, + lock_file_path: Box, lock_file: File, } diff --git a/crates/fspy_shared/src/ipc/mod.rs b/crates/fspy_shared/src/ipc/mod.rs index 825e0e7d..2b348caf 100644 --- a/crates/fspy_shared/src/ipc/mod.rs +++ b/crates/fspy_shared/src/ipc/mod.rs @@ -5,7 +5,7 @@ use std::fmt::Debug; use bincode::{BorrowDecode, Encode, config::Configuration}; use bitflags::bitflags; -pub use native_str::{NativeStr, NativeString}; +pub use native_str::NativeStr; pub const BINCODE_CONFIG: Configuration = bincode::config::standard(); @@ -35,16 +35,16 @@ impl Debug for AccessMode { #[derive(Encode, BorrowDecode, Debug, Clone, Copy)] pub struct PathAccess<'a> { pub mode: AccessMode, - pub path: NativeStr<'a>, + pub path: &'a NativeStr, // TODO: add follow_symlinks (O_NOFOLLOW) } impl<'a> PathAccess<'a> { - pub fn read(path: impl Into>) -> Self { + pub fn read(path: impl Into<&'a NativeStr>) -> Self { Self { mode: AccessMode::READ, path: path.into() } } - pub fn read_dir(path: impl Into>) -> Self { + pub fn read_dir(path: impl Into<&'a NativeStr>) -> Self { Self { mode: AccessMode::READ_DIR, path: path.into() } } } diff --git a/crates/fspy_shared/src/ipc/native_str.rs b/crates/fspy_shared/src/ipc/native_str.rs index 58d527b9..905d02f5 100644 --- a/crates/fspy_shared/src/ipc/native_str.rs +++ b/crates/fspy_shared/src/ipc/native_str.rs @@ -1,206 +1,178 @@ #[cfg(windows)] use std::ffi::OsString; +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt as _; +#[cfg(windows)] +use std::os::windows::ffi::OsStrExt as _; +#[cfg(windows)] +use std::os::windows::ffi::OsStringExt as _; use std::{ borrow::Cow, ffi::OsStr, fmt::Debug, path::{Path, StripPrefixError}, - sync::Arc, }; use allocator_api2::alloc::Allocator; -use bincode::{BorrowDecode, Decode, Encode}; -#[cfg(unix)] -use bstr::BStr; - -/// Similar to `OsStr`, but requires zero-copy to construct from either wide characters on Windows. -#[derive(Encode, BorrowDecode, Clone, Copy, PartialEq, Eq)] -pub struct NativeStr<'a> { +use bincode::{ + BorrowDecode, Decode, Encode, + de::{BorrowDecoder, Decoder}, + error::DecodeError, + impl_borrow_decode, +}; +#[cfg(windows)] +use bytemuck::must_cast_slice; +use bytemuck::{TransparentWrapper, TransparentWrapperAlloc}; + +/// Similar to `OsStr`, but +/// - Can be infallibly and losslessly encoded/decoded using bincode. +/// (`Encode`/`Decoded` implementations for `OsStr` requires it to be valid UTF-8. This does not.) +/// - Can be constructed from wide characters on Windows with zero copy. +/// - Supports zero-copy `BorrowDecode`. +#[derive(TransparentWrapper, Encode, PartialEq, Eq)] +#[repr(transparent)] +pub struct NativeStr { // On unix, this is the raw bytes of the OsStr. // On windows, this is safely transmuted from `&[u16]` in `NativeStr::from_wide`. We don't declare it as `&[u16]` to allow `BorrowDecode`. // Transmuting back to `&[u16]` would be unsafe because of different alignments between `u8` and `u16` (See `to_os_string`). - data: &'a [u8], -} - -#[cfg(unix)] -impl<'a> From<&'a Path> for NativeStr<'a> { - fn from(value: &'a Path) -> Self { - use std::os::unix::ffi::OsStrExt as _; - Self::from_bytes(value.as_os_str().as_bytes()) - } -} - -#[cfg(unix)] -impl<'a> From<&'a str> for NativeStr<'a> { - #[cfg(unix)] - fn from(value: &'a str) -> Self { - Self::from_bytes(value.as_bytes()) - } + data: [u8], } -impl<'a> NativeStr<'a> { - pub fn clone_in<'new_alloc, A>(&self, alloc: &'new_alloc A) -> NativeStr<'new_alloc> - where - &'new_alloc A: Allocator, - { - use allocator_api2::vec::Vec; - let mut data = Vec::::with_capacity_in(self.data.len(), alloc); - data.extend_from_slice(self.data); - let data = data.leak::<'new_alloc>(); - NativeStr { data } - } - +impl NativeStr { #[cfg(unix)] #[must_use] - pub const fn from_bytes(bytes: &'a [u8]) -> Self { - Self { data: bytes } + pub fn from_bytes(bytes: &[u8]) -> &Self { + Self::wrap_ref(bytes) } #[cfg(windows)] - pub fn from_wide(wide: &'a [u16]) -> Self { - use bytemuck::must_cast_slice; - Self { data: must_cast_slice(wide) } + pub fn from_wide(wide: &[u16]) -> &Self { + Self::wrap_ref(must_cast_slice(wide)) } #[cfg(unix)] #[must_use] - pub fn as_os_str(&self) -> &'a OsStr { - std::os::unix::ffi::OsStrExt::from_bytes(self.data) - } - - #[cfg(unix)] - #[must_use] - pub fn as_bstr(&self) -> &'a BStr { - use bstr::ByteSlice; - - self.data.as_bstr() + pub fn as_os_str(&self) -> &OsStr { + OsStr::from_bytes(&self.data) } #[cfg(windows)] + #[must_use] pub fn to_os_string(&self) -> OsString { - use std::os::windows::ffi::OsStringExt; - use bytemuck::{allocation::pod_collect_to_vec, try_cast_slice}; - if let Ok(wide) = try_cast_slice::(self.data) { + if let Ok(wide) = try_cast_slice::(&self.data) { OsString::from_wide(wide) } else { - let wide = pod_collect_to_vec::(self.data); + let wide = pod_collect_to_vec::(&self.data); OsString::from_wide(&wide) } } #[must_use] - pub fn to_cow_os_str(&self) -> Cow<'a, OsStr> { + pub fn to_cow_os_str(&self) -> Cow<'_, OsStr> { #[cfg(windows)] return Cow::Owned(self.to_os_string()); #[cfg(unix)] return Cow::Borrowed(self.as_os_str()); } +} - pub fn strip_path_prefix, R, F: FnOnce(Result<&Path, StripPrefixError>) -> R>( - &self, - base: P, - f: F, - ) -> R { - let me = self.to_cow_os_str(); - let me = strip_windows_path_prefix(&me); - let base = strip_windows_path_prefix(base.as_ref().as_os_str()); - f(Path::new(me).strip_prefix(base)) +impl Debug for NativeStr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + ::fmt(self.to_cow_os_str().as_ref(), f) } } -/// Strip the `\\?\`, `\\.\`, `\??\` prefix from a Windows path, if present. -/// Does nothing on non-Windows platforms. -/// -/// \\?\ and \\.\ are used to enable long paths and access to device paths. -/// \??\ is used in Nt* calls. -/// The resulting path is not necessarily valid or points to the same location, -/// but it's good enough for sanitizing paths in `NativeStr::strip_path_prefix`. -fn strip_windows_path_prefix(p: &OsStr) -> &OsStr { - #[cfg(windows)] - { - use os_str_bytes::OsStrBytesExt as _; - for prefix in [r"\\?\", r"\\.\", r"\??\"] { - if let Some(stripped) = p.strip_prefix(prefix) { - return stripped; - } - } - p - } - #[cfg(not(windows))] - { - p +impl<'a, C> BorrowDecode<'a, C> for &'a NativeStr { + fn borrow_decode>( + decoder: &mut D, + ) -> Result { + let data: &'a [u8] = BorrowDecode::borrow_decode(decoder)?; + Ok(NativeStr::wrap_ref(data)) } } #[cfg(unix)] -impl<'a> From<&'a BStr> for NativeStr<'a> { - fn from(value: &'a BStr) -> Self { - Self::from_bytes(value) +impl<'a, S: AsRef + ?Sized> From<&'a S> for &'a NativeStr { + fn from(value: &'a S) -> Self { + NativeStr::from_bytes(value.as_ref().as_bytes()) } } -impl Debug for NativeStr<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - ::fmt(self.to_cow_os_str().as_ref(), f) +impl Decode for Box { + fn decode>(decoder: &mut D) -> Result { + let data: Box<[u8]> = Decode::decode(decoder)?; + Ok(NativeStr::wrap_box(data)) } } +impl_borrow_decode!(Box); -/// Similar to `OsString`, but can be losslessly encoded/decoded using bincode. -/// `Encode`/`Decoded` implementations for `OsString` requires it to be valid UTF-8. This does not. -#[derive(Encode, Decode, Clone, Hash)] -pub struct NativeString { - #[cfg(unix)] - data: Arc<[u8]>, - #[cfg(windows)] - data: Arc<[u16]>, +impl Clone for Box { + fn clone(&self) -> Self { + NativeStr::wrap_box(self.data.into()) + } } -impl NativeString { +impl> From for Box { #[cfg(unix)] - pub fn as_os_str(&self) -> &OsStr { - use std::os::unix::ffi::OsStrExt as _; - OsStr::from_bytes(&self.data) + fn from(value: S) -> Self { + NativeStr::wrap_box(value.as_ref().as_bytes().into()) } #[cfg(windows)] - pub fn to_os_string(&self) -> OsString { - use std::os::windows::ffi::OsStringExt as _; - OsString::from_wide(&self.data) - } - - pub fn to_cow_os_str(&self) -> Cow<'_, OsStr> { - #[cfg(unix)] - return Cow::Borrowed(self.as_os_str()); - #[cfg(windows)] - return Cow::Owned(self.to_os_string()); - } -} - -impl<'a> Debug for NativeString { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - ::fmt(&self.to_cow_os_str(), f) + fn from(value: S) -> Self { + let wide: Vec = value.as_ref().encode_wide().collect(); + let data: &[u8] = must_cast_slice(&wide); + NativeStr::wrap_box(data.into()) } } -impl<'a> From<&'a OsStr> for NativeString { - #[cfg(unix)] - fn from(value: &'a OsStr) -> Self { - use std::os::unix::ffi::OsStrExt as _; - Self { data: value.as_bytes().into() } +impl NativeStr { + pub fn clone_in<'new_alloc, A>(&self, alloc: &'new_alloc A) -> &'new_alloc NativeStr + where + &'new_alloc A: Allocator, + { + use allocator_api2::vec::Vec; + let mut data = Vec::::with_capacity_in(self.data.len(), alloc); + data.extend_from_slice(&self.data); + let data = data.leak::<'new_alloc>(); + NativeStr::wrap_ref(data) } - #[cfg(windows)] - fn from(value: &'a OsStr) -> Self { - use std::os::windows::ffi::OsStrExt as _; - Self { data: value.encode_wide().collect() } - } -} + pub fn strip_path_prefix, R, F: FnOnce(Result<&Path, StripPrefixError>) -> R>( + &self, + base: P, + f: F, + ) -> R { + /// Strip the `\\?\`, `\\.\`, `\??\` prefix from a Windows path, if present. + /// Does nothing on non-Windows platforms. + /// + /// \\?\ and \\.\ are used to enable long paths and access to device paths. + /// \??\ is used in Nt* calls. + /// The resulting path is not necessarily valid or points to the same location, + /// but it's good enough for sanitizing paths in `NativeStr::strip_path_prefix`. + fn strip_windows_path_prefix(p: &OsStr) -> &OsStr { + #[cfg(windows)] + { + use os_str_bytes::OsStrBytesExt as _; + for prefix in [r"\\?\", r"\\.\", r"\??\"] { + if let Some(stripped) = p.strip_prefix(prefix) { + return stripped; + } + } + p + } + #[cfg(not(windows))] + { + p + } + } -impl<'a> From<&'a std::path::Path> for NativeString { - fn from(value: &'a std::path::Path) -> Self { - value.as_os_str().into() + let me = self.to_cow_os_str(); + let me = strip_windows_path_prefix(&me); + let base = strip_windows_path_prefix(base.as_ref().as_os_str()); + f(Path::new(me).strip_prefix(base)) } } @@ -222,7 +194,7 @@ mod tests { let mut encoded = encode_to_vec(native_str, config::standard()).unwrap(); let (decoded, _) = - borrow_decode_from_slice::<'_, NativeStr<'_>, _>(&encoded, config::standard()).unwrap(); + borrow_decode_from_slice::<'_, &NativeStr, _>(&encoded, config::standard()).unwrap(); let decoded_wide = decoded.to_os_string().encode_wide().collect::>(); assert_eq!(decoded_wide, wide_str); @@ -231,7 +203,7 @@ mod tests { encoded.copy_within(..encoded_len, 1); let (decoded, _) = - borrow_decode_from_slice::<'_, NativeStr<'_>, _>(&encoded[1..], config::standard()) + borrow_decode_from_slice::<'_, &NativeStr, _>(&encoded[1..], config::standard()) .unwrap(); let decoded_wide = decoded.to_os_string().encode_wide().collect::>(); assert_eq!(decoded_wide, wide_str); diff --git a/crates/fspy_shared_unix/src/exec/mod.rs b/crates/fspy_shared_unix/src/exec/mod.rs index 2afba8bf..341e610b 100644 --- a/crates/fspy_shared_unix/src/exec/mod.rs +++ b/crates/fspy_shared_unix/src/exec/mod.rs @@ -113,7 +113,10 @@ impl Exec { self.program.as_ref(), path, |path| { - on_path_access(PathAccess { path: path.into(), mode: AccessMode::READ }); + on_path_access(PathAccess { + path: OsStr::from_bytes(path).into(), + mode: AccessMode::READ, + }); access(OsStr::from_bytes(path), AccessFlags::X_OK) }, |program| Ok(program.to_owned()), diff --git a/crates/fspy_shared_unix/src/payload.rs b/crates/fspy_shared_unix/src/payload.rs index c0de11ec..45190c9a 100644 --- a/crates/fspy_shared_unix/src/payload.rs +++ b/crates/fspy_shared_unix/src/payload.rs @@ -3,13 +3,13 @@ use std::os::unix::ffi::OsStringExt; use base64::{Engine as _, prelude::BASE64_STANDARD_NO_PAD}; use bincode::{Decode, Encode, config::standard}; use bstr::BString; -use fspy_shared::ipc::{NativeString, channel::ChannelConf}; +use fspy_shared::ipc::{NativeStr, channel::ChannelConf}; #[derive(Debug, Encode, Decode)] pub struct Payload { pub ipc_channel_conf: ChannelConf, - pub preload_path: NativeString, + pub preload_path: Box, #[cfg(target_os = "macos")] pub artifacts: Artifacts, @@ -21,9 +21,8 @@ pub struct Payload { #[cfg(target_os = "macos")] #[derive(Debug, Encode, Decode, Clone)] pub struct Artifacts { - pub bash_path: NativeString, - pub coreutils_path: NativeString, - // pub interpose_cdylib_path: NativeString, + pub bash_path: Box, + pub coreutils_path: Box, } pub(crate) const PAYLOAD_ENV_NAME: &str = "FSPY_PAYLOAD"; diff --git a/crates/fspy_shared_unix/src/spawn/mod.rs b/crates/fspy_shared_unix/src/spawn/mod.rs index 9e2ad1ed..6b36fcdb 100644 --- a/crates/fspy_shared_unix/src/spawn/mod.rs +++ b/crates/fspy_shared_unix/src/spawn/mod.rs @@ -6,7 +6,8 @@ mod os_specific; #[path = "./macos.rs"] mod os_specific; -use bstr::ByteSlice; +use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + use fspy_shared::ipc::{AccessMode, PathAccess}; #[doc(hidden)] #[cfg(target_os = "macos")] @@ -40,7 +41,7 @@ pub fn handle_exec( mut on_path_access: impl FnMut(PathAccess<'_>), ) -> nix::Result> { let mut on_path_access = |path_access: PathAccess<'_>| { - if path_access.path.as_bstr().first() == Some(&b'/') { + if path_access.path.as_os_str().as_bytes().first() == Some(&b'/') { on_path_access(path_access); } else { let path = @@ -50,7 +51,10 @@ pub fn handle_exec( }; command.resolve(&mut on_path_access, config)?; - on_path_access(PathAccess { mode: AccessMode::READ, path: command.program.as_bstr().into() }); + on_path_access(PathAccess { + mode: AccessMode::READ, + path: OsStr::from_bytes(&command.program).into(), + }); os_specific::handle_exec(command, encoded_payload) }