Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions crates/openshell-driver-vm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ By default `mise run gateway:vm`:
- Registers the CLI gateway `vm-dev` by writing `~/.config/openshell/gateways/vm-dev/metadata.json`. It does not modify the workspace `.env`.
- Persists the gateway SQLite DB under `.cache/gateway-vm/gateway.db`.
- Places the VM driver state (per-sandbox `overlay.ext4`, image cache, and `run/compute-driver.sock`) under `/tmp/openshell-vm-driver-$USER-vm-dev/` so the AF_UNIX socket path stays under macOS `SUN_LEN`.
- Starts development sandboxes with 4 vCPUs, 8192 MiB RAM, and a 32768 MiB sparse writable overlay disk.
- Writes `.cache/gateway-vm/gateway.toml` with `[openshell.drivers.vm].driver_dir = "$PWD/target/debug"` so the freshly built `openshell-driver-vm` is used instead of an older installed copy from `~/.local/libexec/openshell`, `/usr/libexec/openshell`, or `/usr/local/libexec`.

For GPU passthrough (VFIO), pass `-- --gpu` and run with root privileges:
Expand Down Expand Up @@ -76,6 +77,12 @@ mise run gateway:vm
# custom sandbox image
OPENSHELL_SANDBOX_IMAGE=ghcr.io/example/sandbox:latest mise run gateway:vm

# custom sandbox VM size
OPENSHELL_VM_DRIVER_VCPUS=6 \
OPENSHELL_VM_DRIVER_MEM_MIB=12288 \
OPENSHELL_VM_OVERLAY_DISK_MIB=32768 \
mise run gateway:vm

# custom bootstrap image for the VM runtime used to prepare/boot target images
OPENSHELL_VM_BOOTSTRAP_IMAGE=ghcr.io/example/bootstrap:latest mise run gateway:vm
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ setup_overlay_root() {
if [ "${OPENSHELL_VM_INIT_MODE:-sandbox}" = "image-prep" ]; then
prepare_guest_image_rootfs
sync
if ! umount /overlay 2>/dev/null; then
ts "WARNING: failed to unmount image-prep disk cleanly"
fi
sync
ts "image-prep complete"
exit 0
fi
Expand Down
49 changes: 33 additions & 16 deletions crates/openshell-driver-vm/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use crate::gpu::{
};
use crate::rootfs::{
clone_or_copy_sparse_file, create_ext4_image_from_dir_with_size, create_rootfs_image_from_dir,
extract_rootfs_archive_to, prepare_sandbox_rootfs_from_image_root, sandbox_guest_init_path,
embedded_rootfs_payload_identity, extract_rootfs_archive_to,
prepare_sandbox_rootfs_from_image_root, repair_ext4_image, sandbox_guest_init_path,
set_rootfs_image_file_mode, write_rootfs_image_file,
};
use bollard::Docker;
Expand Down Expand Up @@ -1317,7 +1318,10 @@ impl VmDriver {
image_identity = %source_image_identity,
"vm driver: manifest digest resolved"
);
let image_identity = bootstrap_image_cache_identity(&source_image_identity);
let image_identity = bootstrap_image_cache_identity(
&source_image_identity,
&embedded_rootfs_payload_identity(),
);
let image_path = image_cache_rootfs_image(&self.config.state_dir, &image_identity);

// Emit a driver progress hint for cache hits too and immediately
Expand Down Expand Up @@ -1498,7 +1502,8 @@ impl VmDriver {
docker: &Docker,
image_identity: &str,
) -> Result<String, Status> {
let cache_identity = bootstrap_image_cache_identity(image_identity);
let cache_identity =
bootstrap_image_cache_identity(image_identity, &embedded_rootfs_payload_identity());
let image_path = image_cache_rootfs_image(&self.config.state_dir, &cache_identity);

self.publish_platform_event(
Expand Down Expand Up @@ -1605,7 +1610,8 @@ impl VmDriver {
image_identity: &str,
bootstrap_root_disk: &Path,
) -> Result<PreparedImageDisk, Status> {
let cache_identity = prepared_image_cache_identity(image_identity);
let cache_identity =
prepared_image_cache_identity(image_identity, &embedded_rootfs_payload_identity());
let image_path = image_cache_rootfs_image(&self.config.state_dir, &cache_identity);

if tokio::fs::metadata(&image_path).await.is_ok() {
Expand Down Expand Up @@ -1714,7 +1720,10 @@ impl VmDriver {
"failed to resolve vm sandbox image '{image_ref}': {err}"
))
})?;
let cache_identity = prepared_image_cache_identity(&source_image_identity);
let cache_identity = prepared_image_cache_identity(
&source_image_identity,
&embedded_rootfs_payload_identity(),
);
let image_path = image_cache_rootfs_image(&self.config.state_dir, &cache_identity);

if tokio::fs::metadata(&image_path).await.is_ok() {
Expand Down Expand Up @@ -1904,6 +1913,11 @@ impl VmDriver {
let _ = tokio::fs::remove_dir_all(staging_dir).await;
return Err(err);
}
let prepared_image_for_repair = prepared_image.clone();
tokio::task::spawn_blocking(move || repair_ext4_image(&prepared_image_for_repair))
.await
.map_err(|err| Status::internal(format!("prepared image repair panicked: {err}")))?
.map_err(Status::failed_precondition)?;

if tokio::fs::metadata(&image_path).await.is_ok() {
let _ = tokio::fs::remove_dir_all(staging_dir).await;
Expand Down Expand Up @@ -3724,12 +3738,12 @@ fn write_oci_layout_for_manifest(
Ok(())
}

fn bootstrap_image_cache_identity(image_identity: &str) -> String {
format!("{BOOTSTRAP_IMAGE_CACHE_LAYOUT_VERSION}:{image_identity}")
fn bootstrap_image_cache_identity(image_identity: &str, rootfs_payload_identity: &str) -> String {
format!("{BOOTSTRAP_IMAGE_CACHE_LAYOUT_VERSION}:{rootfs_payload_identity}:{image_identity}")
}

fn prepared_image_cache_identity(image_identity: &str) -> String {
format!("{PREPARED_IMAGE_CACHE_LAYOUT_VERSION}:{image_identity}")
fn prepared_image_cache_identity(image_identity: &str, rootfs_payload_identity: &str) -> String {
format!("{PREPARED_IMAGE_CACHE_LAYOUT_VERSION}:{rootfs_payload_identity}:{image_identity}")
}

fn registry_layer_download_concurrency() -> usize {
Expand Down Expand Up @@ -5517,18 +5531,18 @@ mod tests {
}

#[test]
fn prepared_image_cache_identity_includes_rootfs_layout_version() {
fn prepared_image_cache_identity_includes_rootfs_layout_and_payload_version() {
assert_eq!(
prepared_image_cache_identity("sha256:local-image"),
"sandbox-prepared-rootfs-ext4-umoci-v2:sha256:local-image"
prepared_image_cache_identity("sha256:local-image", "runtime-sha256:abc"),
"sandbox-prepared-rootfs-ext4-umoci-v2:runtime-sha256:abc:sha256:local-image"
);
}

#[test]
fn bootstrap_image_cache_identity_includes_rootfs_layout_version() {
fn bootstrap_image_cache_identity_includes_rootfs_layout_and_payload_version() {
assert_eq!(
bootstrap_image_cache_identity("sha256:bootstrap-image"),
"sandbox-bootstrap-rootfs-ext4-v2:sha256:bootstrap-image"
bootstrap_image_cache_identity("sha256:bootstrap-image", "runtime-sha256:def"),
"sandbox-bootstrap-rootfs-ext4-v2:runtime-sha256:def:sha256:bootstrap-image"
);
}

Expand All @@ -5551,7 +5565,10 @@ mod tests {
&staging_dir,
&GuestImagePayload {
image_ref: "ghcr.io/example/app:latest".to_string(),
image_identity: prepared_image_cache_identity("sha256:abc"),
image_identity: prepared_image_cache_identity(
"sha256:abc",
"runtime-sha256:payload",
),
source: GuestImagePayloadSource::RegistryOciLayout { layout_dir },
},
)
Expand Down
78 changes: 73 additions & 5 deletions crates/openshell-driver-vm/src/rootfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::atomic::{AtomicU64, Ordering};

use sha2::{Digest, Sha256};

const SUPERVISOR: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/openshell-sandbox.zst"));
const UMOCI: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/umoci.zst"));
const GUEST_INIT_SCRIPT: &str = include_str!("../scripts/openshell-vm-sandbox-init.sh");
const ROOTFS_VARIANT_MARKER: &str = ".openshell-rootfs-variant";
const SANDBOX_GUEST_INIT_PATH: &str = "/srv/openshell-vm-sandbox-init.sh";
const SANDBOX_SUPERVISOR_PATH: &str = "/opt/openshell/bin/openshell-sandbox";
Expand All @@ -26,6 +29,18 @@ pub const fn sandbox_guest_init_path() -> &'static str {
SANDBOX_GUEST_INIT_PATH
}

pub fn embedded_rootfs_payload_identity() -> String {
let mut hasher = Sha256::new();
hasher.update(b"openshell-vm-rootfs-payload-v1\0");
hasher.update(b"init\0");
hasher.update(GUEST_INIT_SCRIPT.as_bytes());
hasher.update(b"supervisor\0");
hasher.update(SUPERVISOR);
hasher.update(b"umoci\0");
hasher.update(UMOCI);
format!("runtime-sha256:{:x}", hasher.finalize())
}

pub fn prepare_sandbox_rootfs_from_image_root(
rootfs: &Path,
image_identity: &str,
Expand Down Expand Up @@ -125,6 +140,44 @@ pub fn create_ext4_image_from_dir_with_size(
Ok(())
}

pub fn repair_ext4_image(image_path: &Path) -> Result<(), String> {
let mut last_error = None;
for tool in ["e2fsck", "fsck.ext4"] {
for candidate in e2fs_tool_candidates(tool) {
let label = candidate.display().to_string();
let output = Command::new(&candidate)
.arg("-f")
.arg("-p")
.arg(image_path)
.output();
match output {
Ok(output) if e2fsck_status_is_successful(output.status.code()) => {
return Ok(());
}
Ok(output) => {
last_error = Some(format!(
"{label} failed with status {}\nstdout: {}\nstderr: {}",
output.status,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
));
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
last_error = Some(format!("{label} not found"));
}
Err(err) => {
last_error = Some(format!("run {label}: {err}"));
}
}
}
}
Err(format!(
"failed to repair ext4 image {}: {}. Install e2fsprogs (e2fsck/fsck.ext4) and retry",
image_path.display(),
last_error.unwrap_or_else(|| "e2fsck not found".to_string())
))
}

pub fn clone_or_copy_sparse_file(source: &Path, dest: &Path) -> Result<(), String> {
if let Some(parent) = dest.parent() {
fs::create_dir_all(parent).map_err(|e| format!("create {}: {e}", parent.display()))?;
Expand Down Expand Up @@ -357,11 +410,8 @@ fn prepare_sandbox_rootfs(rootfs: &Path) -> Result<(), String> {
if let Some(parent) = init_path.parent() {
fs::create_dir_all(parent).map_err(|e| format!("create {}: {e}", parent.display()))?;
}
fs::write(
&init_path,
include_str!("../scripts/openshell-vm-sandbox-init.sh"),
)
.map_err(|e| format!("write {}: {e}", init_path.display()))?;
fs::write(&init_path, GUEST_INIT_SCRIPT)
.map_err(|e| format!("write {}: {e}", init_path.display()))?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt as _;
Expand Down Expand Up @@ -429,6 +479,13 @@ fn rootfs_image_size_bytes(source: &Path) -> Result<u64, String> {
Ok(round_up_to_mib(size))
}

fn e2fsck_status_is_successful(code: Option<i32>) -> bool {
// e2fsck uses a bitmask exit status. 0 means clean, 1 means filesystem
// errors were corrected, and 2 requests a reboot for mounted filesystems.
// For offline image files, any combination of those bits is usable.
matches!(code, Some(code) if code >= 0 && (code & !0b11) == 0)
}

fn ext4_image_min_size_bytes(source: &Path) -> Result<u64, String> {
let used = directory_size_bytes(source)?;
Ok(round_up_to_mib(used + EXT4_IMAGE_MIN_HEADROOM_BYTES))
Expand Down Expand Up @@ -1121,6 +1178,17 @@ mod tests {
assert_eq!(debugfs_quote_argument("/tmp/bad\npath"), None);
}

#[test]
fn e2fsck_status_accepts_clean_and_corrected_images() {
for code in [0, 1, 2, 3] {
assert!(e2fsck_status_is_successful(Some(code)));
}
for code in [4, 8, 16, 32, 128] {
assert!(!e2fsck_status_is_successful(Some(code)));
}
assert!(!e2fsck_status_is_successful(None));
}

fn unique_temp_dir() -> PathBuf {
static COUNTER: AtomicU64 = AtomicU64::new(0);
let nanos = SystemTime::now()
Expand Down
91 changes: 77 additions & 14 deletions crates/openshell-sandbox/src/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ use openshell_ocsf::{
NetworkActivityBuilder, Process, SeverityId, StatusId, Url as OcsfUrl, ocsf_emit,
};
use std::net::{IpAddr, SocketAddr};
#[cfg(target_os = "linux")]
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, Ordering};
Expand Down Expand Up @@ -1137,20 +1139,7 @@ fn resolve_owner_identity(
})?;

let ancestors = crate::procfs::collect_ancestor_binaries(owner_pid, entrypoint_pid);

for ancestor in &ancestors {
identity_cache
.verify_or_cache(ancestor)
.map_err(|e| IdentityError {
reason: format!(
"ancestor integrity check failed for {}: {e}",
ancestor.display()
),
binary: Some(bin_path.clone()),
binary_pid: Some(owner_pid),
ancestors: ancestors.clone(),
})?;
}
let ancestors = verify_existing_ancestors(ancestors, identity_cache, &bin_path, owner_pid)?;

let mut exclude = ancestors.clone();
exclude.push(bin_path.clone());
Expand All @@ -1165,6 +1154,56 @@ fn resolve_owner_identity(
})
}

#[cfg(target_os = "linux")]
fn verify_existing_ancestors(
ancestors: Vec<PathBuf>,
identity_cache: &BinaryIdentityCache,
bin_path: &Path,
owner_pid: u32,
) -> std::result::Result<Vec<PathBuf>, IdentityError> {
let mut verified = Vec::with_capacity(ancestors.len());

for ancestor in ancestors {
match std::fs::metadata(&ancestor) {
Ok(_) => {}
Err(error) if error.kind() == std::io::ErrorKind::NotFound => {
debug!(
ancestor = %ancestor.display(),
"Skipping missing process ancestor during identity verification"
);
continue;
}
Err(error) => {
return Err(IdentityError {
reason: format!(
"ancestor integrity check failed for {}: Failed to stat {}: {error}",
ancestor.display(),
ancestor.display()
),
binary: Some(bin_path.to_path_buf()),
binary_pid: Some(owner_pid),
ancestors: verified,
});
}
}

identity_cache
.verify_or_cache(&ancestor)
.map_err(|e| IdentityError {
reason: format!(
"ancestor integrity check failed for {}: {e}",
ancestor.display()
),
binary: Some(bin_path.to_path_buf()),
binary_pid: Some(owner_pid),
ancestors: verified.clone(),
})?;
verified.push(ancestor);
}

Ok(verified)
}

/// Resolve the identity of the process owning a TCP peer connection.
///
/// Walks `/proc/<entrypoint_pid>/net/tcp` to find the socket inode, locates
Expand Down Expand Up @@ -6351,6 +6390,30 @@ network_policies:
assert_eq!(resp_str[body_start..].len(), cl);
}

#[cfg(target_os = "linux")]
#[test]
fn verify_existing_ancestors_skips_missing_paths() {
use crate::identity::BinaryIdentityCache;
use std::io::Write;

let mut existing = tempfile::NamedTempFile::new().unwrap();
existing.write_all(b"ancestor").unwrap();
existing.flush().unwrap();

let missing = existing.path().with_file_name("missing-ancestor");
let cache = BinaryIdentityCache::new();

let verified = verify_existing_ancestors(
vec![existing.path().to_path_buf(), missing],
&cache,
Path::new("/usr/bin/curl"),
123,
)
.expect("missing ancestors should be ignored");

assert_eq!(verified, vec![existing.path().to_path_buf()]);
}

/// End-to-end regression for the `docker cp` hot-swap hazard that
/// motivated `binary_path()` stripping the kernel's `" (deleted)"`
/// suffix (PR #844).
Expand Down
Loading
Loading