diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 2d14519fa..24b3e0a95 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -1310,6 +1310,31 @@ enum SandboxCommands { all: bool, }, + /// Stop a sandbox container without deleting it. + /// + /// Workspace volume, provider links, and the sandbox record survive. + /// Use `sandbox start` to bring it back live. + #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] + Stop { + /// Sandbox names. + #[arg(required_unless_present = "all", num_args = 1.., value_name = "NAME", add = ArgValueCompleter::new(completers::complete_sandbox_names))] + names: Vec, + + /// Stop all sandboxes. + #[arg(long, conflicts_with = "names")] + all: bool, + }, + + /// Start a previously-stopped sandbox container. + /// + /// Idempotent: succeeds when the sandbox is already running. + #[command(help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] + Start { + /// Sandbox names. + #[arg(required = true, num_args = 1.., value_name = "NAME", add = ArgValueCompleter::new(completers::complete_sandbox_names))] + names: Vec, + }, + /// Execute a command in a running sandbox. /// /// Runs a command inside an existing sandbox using the gRPC exec endpoint. @@ -2672,6 +2697,12 @@ async fn main() -> Result<()> { SandboxCommands::Delete { names, all } => { run::sandbox_delete(endpoint, &names, all, &tls, &ctx.name).await?; } + SandboxCommands::Stop { names, all } => { + run::sandbox_stop(endpoint, &names, all, &tls).await?; + } + SandboxCommands::Start { names } => { + run::sandbox_start(endpoint, &names, &tls).await?; + } SandboxCommands::Connect { name, editor } => { let name = resolve_sandbox_name(name, &ctx.name)?; if let Some(editor) = editor.map(Into::into) { diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index e77a40b71..666edc387 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -48,9 +48,9 @@ use openshell_core::proto::{ ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest, RevokeSshSessionRequest, RotateProviderCredentialRequest, Sandbox, SandboxPhase, SandboxPolicy, SandboxSpec, SandboxTemplate, ServiceEndpointResponse, SetClusterInferenceRequest, - SettingScope, SettingValue, TcpForwardFrame, TcpForwardInit, TcpRelayTarget, - UpdateConfigRequest, UpdateProviderRequest, WatchSandboxRequest, exec_sandbox_event, - setting_value, tcp_forward_init, + SettingScope, SettingValue, StartSandboxRequest, StopSandboxRequest, TcpForwardFrame, + TcpForwardInit, TcpRelayTarget, UpdateConfigRequest, UpdateProviderRequest, + WatchSandboxRequest, exec_sandbox_event, setting_value, tcp_forward_init, }; use openshell_core::settings::{self, SettingValueKind}; use openshell_core::{ObjectId, ObjectName}; @@ -3428,6 +3428,80 @@ pub async fn sandbox_delete( Ok(()) } +/// Stop a sandbox by name without deleting it. Workspace volume and +/// provider links survive; use `sandbox start` to bring it back live. +pub async fn sandbox_stop( + server: &str, + names: &[String], + all: bool, + tls: &TlsOptions, +) -> Result<()> { + let mut client = grpc_client(server, tls).await?; + + let names_to_stop: Vec = if all { + let response = client + .list_sandboxes(ListSandboxesRequest { + limit: 1000, + offset: 0, + label_selector: String::new(), + }) + .await + .into_diagnostic()?; + let sandboxes = response.into_inner().sandboxes; + if sandboxes.is_empty() { + println!("No sandboxes to stop."); + return Ok(()); + } + sandboxes + .into_iter() + .map(|s| s.object_name().to_string()) + .collect() + } else { + names.to_vec() + }; + + for name in &names_to_stop { + if let Ok(stopped) = stop_forwards_for_sandbox(name) { + for port in stopped { + eprintln!( + "{} Stopped forward of port {port} for sandbox {name}", + "✓".green().bold(), + ); + } + } + + client + .stop_sandbox(StopSandboxRequest { name: name.clone() }) + .await + .into_diagnostic()?; + println!("{} Stopped sandbox {name}", "✓".green().bold()); + } + + Ok(()) +} + +/// Start a previously-stopped sandbox by name. Idempotent: succeeds when +/// the sandbox is already running. Fails if the backend resource has +/// been pruned (e.g. by manual container removal). +pub async fn sandbox_start(server: &str, names: &[String], tls: &TlsOptions) -> Result<()> { + let mut client = grpc_client(server, tls).await?; + for name in names { + let response = client + .start_sandbox(StartSandboxRequest { name: name.clone() }) + .await + .into_diagnostic()?; + if response.into_inner().started { + println!("{} Started sandbox {name}", "✓".green().bold()); + } else { + println!( + "{} Sandbox {name} record exists but backend resource is missing", + "!".yellow(), + ); + } + } + Ok(()) +} + /// Return the provider type inferred from the trailing command, if any. fn inferred_provider_type(command: &[String]) -> Option { detect_provider_from_command(command).map(str::to_string) diff --git a/crates/openshell-cli/tests/ensure_providers_integration.rs b/crates/openshell-cli/tests/ensure_providers_integration.rs index fa2605ac2..f477eb2b3 100644 --- a/crates/openshell-cli/tests/ensure_providers_integration.rs +++ b/crates/openshell-cli/tests/ensure_providers_integration.rs @@ -25,7 +25,8 @@ use openshell_core::proto::{ ListProvidersRequest, ListProvidersResponse, ListSandboxProvidersRequest, ListSandboxProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, Provider, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, - SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + SandboxStreamEvent, ServiceStatus, StartSandboxRequest, StartSandboxResponse, + StopSandboxRequest, StopSandboxResponse, SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest, }; use openshell_core::{ObjectId, ObjectName}; @@ -134,6 +135,20 @@ impl OpenShell for TestOpenShell { Ok(Response::new(DeleteSandboxResponse { deleted: true })) } + async fn stop_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StopSandboxResponse {})) + } + + async fn start_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StartSandboxResponse { started: true })) + } + async fn get_sandbox_config( &self, _request: tonic::Request, diff --git a/crates/openshell-cli/tests/mtls_integration.rs b/crates/openshell-cli/tests/mtls_integration.rs index fd7a18b28..500bc353e 100644 --- a/crates/openshell-cli/tests/mtls_integration.rs +++ b/crates/openshell-cli/tests/mtls_integration.rs @@ -102,6 +102,22 @@ impl OpenShell for TestOpenShell { )) } + async fn stop_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(openshell_core::proto::StopSandboxResponse {})) + } + + async fn start_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(openshell_core::proto::StartSandboxResponse { + started: true, + })) + } + async fn get_sandbox_config( &self, _request: tonic::Request, diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index cb2b3cb18..aae7b272c 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -25,7 +25,8 @@ use openshell_core::proto::{ ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileCredential, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, RotateProviderCredentialRequest, RotateProviderCredentialResponse, Sandbox, SandboxResponse, - SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, + SandboxStreamEvent, ServiceStatus, StartSandboxRequest, StartSandboxResponse, + StopSandboxRequest, StopSandboxResponse, SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest, }; use openshell_core::{ObjectId, ObjectName}; @@ -259,6 +260,20 @@ impl OpenShell for TestOpenShell { Ok(Response::new(DeleteSandboxResponse { deleted: true })) } + async fn stop_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StopSandboxResponse {})) + } + + async fn start_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StartSandboxResponse { started: true })) + } + async fn get_sandbox_config( &self, _request: tonic::Request, diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index 2a82ae33f..ee3c3910a 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -23,7 +23,8 @@ use openshell_core::proto::{ ListSandboxProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, PlatformEvent, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, Sandbox, SandboxCondition, SandboxLogLine, SandboxPhase, SandboxResponse, SandboxStatus, SandboxStreamEvent, - ServiceStatus, SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest, + ServiceStatus, StartSandboxRequest, StartSandboxResponse, StopSandboxRequest, + StopSandboxResponse, SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest, sandbox_stream_event, }; use std::collections::HashMap; @@ -154,6 +155,20 @@ impl OpenShell for TestOpenShell { Ok(Response::new(DeleteSandboxResponse { deleted: true })) } + async fn stop_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StopSandboxResponse {})) + } + + async fn start_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StartSandboxResponse { started: true })) + } + async fn get_sandbox_config( &self, _request: tonic::Request, diff --git a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs index 44393fb2f..66593b73b 100644 --- a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs +++ b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs @@ -21,8 +21,9 @@ use openshell_core::proto::{ GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, ListSandboxProvidersRequest, ListSandboxProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, - Sandbox, SandboxPolicy, SandboxResponse, SandboxStreamEvent, ServiceStatus, SupervisorMessage, - UpdateProviderRequest, WatchSandboxRequest, + Sandbox, SandboxPolicy, SandboxResponse, SandboxStreamEvent, ServiceStatus, + StartSandboxRequest, StartSandboxResponse, StopSandboxRequest, StopSandboxResponse, + SupervisorMessage, UpdateProviderRequest, WatchSandboxRequest, }; use std::sync::Arc; use tempfile::TempDir; @@ -119,6 +120,20 @@ impl OpenShell for TestOpenShell { Ok(Response::new(DeleteSandboxResponse { deleted: true })) } + async fn stop_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StopSandboxResponse {})) + } + + async fn start_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StartSandboxResponse { started: true })) + } + async fn get_sandbox_config( &self, request: tonic::Request, diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index 0d962fac6..e1bc26e88 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -329,6 +329,33 @@ impl PodmanComputeDriver { .map_err(ComputeDriverError::from) } + /// Start a previously-stopped sandbox container. Idempotent: returns + /// `Ok(true)` when the container is already running. Returns + /// `Ok(false)` when no managed container exists for the sandbox so + /// the caller can surface the gap. + pub async fn resume_sandbox( + &self, + _sandbox_id: &str, + sandbox_name: &str, + ) -> Result { + let name = validated_container_name(sandbox_name)?; + info!(sandbox_name = %sandbox_name, container = %name, "Starting sandbox container"); + + let inspect = match self.client.inspect_container(&name).await { + Ok(i) => i, + Err(PodmanApiError::NotFound(_)) => return Ok(false), + Err(e) => return Err(ComputeDriverError::from(e)), + }; + if inspect.state.running { + return Ok(true); + } + match self.client.start_container(&name).await { + Ok(()) => Ok(true), + Err(PodmanApiError::NotFound(_)) => Ok(false), + Err(e) => Err(ComputeDriverError::from(e)), + } + } + /// Delete a sandbox container and its workspace volume. pub async fn delete_sandbox( &self, @@ -742,4 +769,130 @@ mod tests { ); let _ = std::fs::remove_file(socket_path); } + + #[tokio::test] + async fn resume_sandbox_returns_false_when_container_missing() { + let sandbox_name = "demo"; + let container_name = container::container_name(sandbox_name); + let (socket_path, request_log, handle) = spawn_podman_stub( + "resume-not-found", + vec![StubResponse::new( + StatusCode::NOT_FOUND, + r#"{"message":"gone"}"#, + )], + ); + let driver = test_driver(socket_path.clone()); + + let started = driver + .resume_sandbox("sandbox-id", sandbox_name) + .await + .expect("resume should succeed"); + + assert!(!started, "missing container should report started=false"); + handle.await.expect("stub task should finish"); + let requests = request_log + .lock() + .expect("request log lock should not be poisoned") + .clone(); + assert_eq!( + requests, + vec![format!( + "GET {}", + api_path(&format!("/libpod/containers/{container_name}/json")) + )] + ); + let _ = std::fs::remove_file(socket_path); + } + + #[tokio::test] + async fn resume_sandbox_is_noop_when_already_running() { + let sandbox_name = "demo"; + let container_name = container::container_name(sandbox_name); + let inspect_body = serde_json::json!({ + "Id": "container-id", + "Name": format!("/{container_name}"), + "State": { + "Status": "running", + "Running": true + }, + "Config": { "Labels": {} } + }) + .to_string(); + let (socket_path, request_log, handle) = spawn_podman_stub( + "resume-running", + vec![StubResponse::new(StatusCode::OK, inspect_body)], + ); + let driver = test_driver(socket_path.clone()); + + let started = driver + .resume_sandbox("sandbox-id", sandbox_name) + .await + .expect("resume should succeed"); + + assert!(started, "running container should report started=true"); + handle.await.expect("stub task should finish"); + let requests = request_log + .lock() + .expect("request log lock should not be poisoned") + .clone(); + assert_eq!( + requests, + vec![format!( + "GET {}", + api_path(&format!("/libpod/containers/{container_name}/json")) + )], + "no start request should be issued for already-running container" + ); + let _ = std::fs::remove_file(socket_path); + } + + #[tokio::test] + async fn resume_sandbox_starts_stopped_container() { + let sandbox_name = "demo"; + let container_name = container::container_name(sandbox_name); + let inspect_body = serde_json::json!({ + "Id": "container-id", + "Name": format!("/{container_name}"), + "State": { + "Status": "exited", + "Running": false + }, + "Config": { "Labels": {} } + }) + .to_string(); + let (socket_path, request_log, handle) = spawn_podman_stub( + "resume-stopped", + vec![ + StubResponse::new(StatusCode::OK, inspect_body), + StubResponse::new(StatusCode::NO_CONTENT, ""), + ], + ); + let driver = test_driver(socket_path.clone()); + + let started = driver + .resume_sandbox("sandbox-id", sandbox_name) + .await + .expect("resume should succeed"); + + assert!(started, "stopped container should report started=true"); + handle.await.expect("stub task should finish"); + let requests = request_log + .lock() + .expect("request log lock should not be poisoned") + .clone(); + assert_eq!( + requests, + vec![ + format!( + "GET {}", + api_path(&format!("/libpod/containers/{container_name}/json")) + ), + format!( + "POST {}", + api_path(&format!("/libpod/containers/{container_name}/start")) + ), + ] + ); + let _ = std::fs::remove_file(socket_path); + } } diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index ed7ecec18..3fccb680f 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -375,7 +375,6 @@ impl CachedOpenShellClient { Ok(()) } - } /// Fetch the resolved inference route bundle from the server. diff --git a/crates/openshell-server/src/auth/authz.rs b/crates/openshell-server/src/auth/authz.rs index 832687c14..c2809c145 100644 --- a/crates/openshell-server/src/auth/authz.rs +++ b/crates/openshell-server/src/auth/authz.rs @@ -63,6 +63,8 @@ const SCOPED_METHODS: &[(&str, &str)] = &[ // sandbox:write ("/openshell.v1.OpenShell/CreateSandbox", "sandbox:write"), ("/openshell.v1.OpenShell/DeleteSandbox", "sandbox:write"), + ("/openshell.v1.OpenShell/StopSandbox", "sandbox:write"), + ("/openshell.v1.OpenShell/StartSandbox", "sandbox:write"), ("/openshell.v1.OpenShell/ExecSandbox", "sandbox:write"), ("/openshell.v1.OpenShell/ForwardTcp", "sandbox:write"), ("/openshell.v1.OpenShell/CreateSshSession", "sandbox:write"), diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 11b55d0f4..7ad68c92a 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -19,7 +19,8 @@ use openshell_core::proto::compute::v1::{ BindVolume as DriverBindVolume, CreateSandboxRequest, DeleteSandboxRequest, DriverCondition, DriverPlatformEvent, DriverResourceRequirements, DriverSandbox, DriverSandboxSpec, DriverSandboxStatus, DriverSandboxTemplate, GetCapabilitiesRequest, GetSandboxRequest, - ListSandboxesRequest, ValidateSandboxCreateRequest, WatchSandboxesEvent, WatchSandboxesRequest, + ListSandboxesRequest, StopSandboxRequest as DriverStopSandboxRequest, + ValidateSandboxCreateRequest, WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_client::ComputeDriverClient, compute_driver_server::ComputeDriver, watch_sandboxes_event, }; @@ -87,6 +88,15 @@ impl StartupResume for DockerComputeDriver { .map_err(|err| err.to_string()) } } + +#[tonic::async_trait] +impl StartupResume for PodmanComputeDriver { + async fn resume_sandbox(&self, sandbox_id: &str, sandbox_name: &str) -> Result { + Self::resume_sandbox(self, sandbox_id, sandbox_name) + .await + .map_err(|err| err.to_string()) + } +} /// Interval between store-vs-backend reconciliation sweeps. const RECONCILE_INTERVAL: Duration = Duration::from_secs(60); @@ -381,14 +391,17 @@ impl ComputeRuntime { tracing_log_bus: TracingLogBus, supervisor_sessions: Arc, ) -> Result { - let driver = PodmanComputeDriver::new(config) - .await - .map_err(|err| ComputeError::Message(err.to_string()))?; - let driver: SharedComputeDriver = Arc::new(PodmanDriverService::new(driver)); + let inner = Arc::new( + PodmanComputeDriver::new(config) + .await + .map_err(|err| ComputeError::Message(err.to_string()))?, + ); + let startup_resume: Arc = inner.clone(); + let driver: SharedComputeDriver = Arc::new(PodmanDriverService::new((*inner).clone())); Self::from_driver( driver, None, - None, + Some(startup_resume), None, store, sandbox_index, @@ -546,6 +559,60 @@ impl ComputeRuntime { Ok(deleted) } + /// Stop the compute resource backing a sandbox without removing the + /// sandbox record. Workspace volume and provider links survive. Phase + /// is left to the watch loop to update as the backend transitions. + /// Idempotent: a missing or already-stopped backend resource is not + /// an error. + pub async fn stop_sandbox(&self, name: &str) -> Result<(), Status> { + let sandbox = self + .store + .get_message_by_name::(name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?; + + let Some(sandbox) = sandbox else { + return Err(Status::not_found("sandbox not found")); + }; + + let driver_sandbox = driver_sandbox_from_public(&sandbox); + self.driver + .stop_sandbox(Request::new(DriverStopSandboxRequest { + sandbox_id: driver_sandbox.id, + sandbox_name: driver_sandbox.name, + })) + .await + .map(|_| ()) + .map_err(|err| Status::internal(format!("stop sandbox failed: {}", err.message()))) + } + + /// Start a previously-stopped sandbox backend resource. Idempotent: + /// an already-running container returns success. Returns + /// `Ok(false)` when the record exists but the backend resource is + /// missing — caller should surface this so the user can recreate. + pub async fn start_sandbox(&self, name: &str) -> Result { + let sandbox = self + .store + .get_message_by_name::(name) + .await + .map_err(|e| Status::internal(format!("fetch sandbox failed: {e}")))?; + + let Some(sandbox) = sandbox else { + return Err(Status::not_found("sandbox not found")); + }; + + let Some(resume) = &self.startup_resume else { + return Err(Status::unimplemented( + "start sandbox not supported by configured compute driver", + )); + }; + + resume + .resume_sandbox(sandbox.object_id(), sandbox.object_name()) + .await + .map_err(|err| Status::internal(format!("start sandbox failed: {err}"))) + } + pub fn spawn_watchers(&self) { let runtime = Arc::new(self.clone()); let watch_runtime = runtime.clone(); @@ -2815,6 +2882,91 @@ mod tests { ); } + #[tokio::test] + async fn stop_sandbox_returns_not_found_when_sandbox_missing() { + let runtime = test_runtime(Arc::new(TestDriver::default())).await; + let err = runtime + .stop_sandbox("does-not-exist") + .await + .expect_err("missing sandbox should fail"); + assert_eq!(err.code(), Code::NotFound); + } + + #[tokio::test] + async fn stop_sandbox_succeeds_for_existing_sandbox() { + let runtime = test_runtime(Arc::new(TestDriver::default())).await; + let sandbox = sandbox_record("sb-1", "live", SandboxPhase::Ready); + runtime.store.put_message(&sandbox).await.unwrap(); + runtime + .stop_sandbox("live") + .await + .expect("stop_sandbox should succeed"); + } + + #[tokio::test] + async fn start_sandbox_returns_not_found_when_sandbox_missing() { + let resume = Arc::new(RecordingResume::default()); + let runtime = + test_runtime_with_resume(Arc::new(TestDriver::default()), Some(resume.clone())).await; + let err = runtime + .start_sandbox("does-not-exist") + .await + .expect_err("missing sandbox should fail"); + assert_eq!(err.code(), Code::NotFound); + assert!( + resume.calls().await.is_empty(), + "resume should not be called when sandbox record is missing" + ); + } + + #[tokio::test] + async fn start_sandbox_returns_unimplemented_without_resume_hook() { + let runtime = test_runtime(Arc::new(TestDriver::default())).await; + let sandbox = sandbox_record("sb-1", "live", SandboxPhase::Ready); + runtime.store.put_message(&sandbox).await.unwrap(); + let err = runtime + .start_sandbox("live") + .await + .expect_err("start without resume hook should fail"); + assert_eq!(err.code(), Code::Unimplemented); + } + + #[tokio::test] + async fn start_sandbox_forwards_to_resume_hook() { + let resume = Arc::new(RecordingResume::default()); + resume.set_result("sb-1", Ok(true)).await; + let runtime = + test_runtime_with_resume(Arc::new(TestDriver::default()), Some(resume.clone())).await; + let sandbox = sandbox_record("sb-1", "live", SandboxPhase::Ready); + runtime.store.put_message(&sandbox).await.unwrap(); + + let started = runtime + .start_sandbox("live") + .await + .expect("start_sandbox should succeed"); + assert!(started); + assert_eq!( + resume.calls().await, + vec![("sb-1".to_string(), "live".to_string())] + ); + } + + #[tokio::test] + async fn start_sandbox_reports_false_when_backend_resource_missing() { + let resume = Arc::new(RecordingResume::default()); + resume.set_result("sb-1", Ok(false)).await; + let runtime = + test_runtime_with_resume(Arc::new(TestDriver::default()), Some(resume.clone())).await; + let sandbox = sandbox_record("sb-1", "ghost", SandboxPhase::Ready); + runtime.store.put_message(&sandbox).await.unwrap(); + + let started = runtime + .start_sandbox("ghost") + .await + .expect("start_sandbox should succeed"); + assert!(!started); + } + #[test] fn build_platform_config_inverts_user_namespaces_to_host_users() { use prost_types::value::Kind; diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index 8f70c20bb..65c0a42d7 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -36,9 +36,11 @@ use openshell_core::proto::{ RejectDraftChunkResponse, RelayFrame, ReportPolicyStatusRequest, ReportPolicyStatusResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, RotateProviderCredentialRequest, RotateProviderCredentialResponse, SandboxResponse, SandboxStreamEvent, ServiceEndpointResponse, - ServiceStatus, SubmitPolicyAnalysisRequest, SubmitPolicyAnalysisResponse, SupervisorMessage, - TcpForwardFrame, UndoDraftChunkRequest, UndoDraftChunkResponse, UpdateConfigRequest, - UpdateConfigResponse, UpdateProviderRequest, WatchSandboxRequest, open_shell_server::OpenShell, + ServiceStatus, StartSandboxRequest, StartSandboxResponse, StopSandboxRequest, + StopSandboxResponse, SubmitPolicyAnalysisRequest, SubmitPolicyAnalysisResponse, + SupervisorMessage, TcpForwardFrame, UndoDraftChunkRequest, UndoDraftChunkResponse, + UpdateConfigRequest, UpdateConfigResponse, UpdateProviderRequest, WatchSandboxRequest, + open_shell_server::OpenShell, }; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; @@ -261,6 +263,20 @@ impl OpenShell for OpenShellService { sandbox::handle_delete_sandbox(&self.state, request).await } + async fn stop_sandbox( + &self, + request: Request, + ) -> Result, Status> { + sandbox::handle_stop_sandbox(&self.state, request).await + } + + async fn start_sandbox( + &self, + request: Request, + ) -> Result, Status> { + sandbox::handle_start_sandbox(&self.state, request).await + } + // --- Exec --- type ExecSandboxStream = ReceiverStream>; diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index 4978687ed..9171cfc96 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -19,8 +19,9 @@ use openshell_core::proto::{ ExecSandboxInput, ExecSandboxRequest, ExecSandboxStderr, ExecSandboxStdout, GetSandboxRequest, ListSandboxProvidersRequest, ListSandboxProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, Provider, RevokeSshSessionRequest, RevokeSshSessionResponse, - SandboxResponse, SandboxStreamEvent, SshRelayTarget, TcpForwardFrame, TcpForwardInit, - TcpRelayTarget, WatchSandboxRequest, relay_open, tcp_forward_init, + SandboxResponse, SandboxStreamEvent, SshRelayTarget, StartSandboxRequest, StartSandboxResponse, + StopSandboxRequest, StopSandboxResponse, TcpForwardFrame, TcpForwardInit, TcpRelayTarget, + WatchSandboxRequest, relay_open, tcp_forward_init, }; use openshell_core::proto::{Sandbox, SandboxPhase, SandboxTemplate, SshSession}; use openshell_core::{ObjectId, ObjectName}; @@ -403,6 +404,34 @@ pub(super) async fn handle_delete_sandbox( Ok(Response::new(DeleteSandboxResponse { deleted })) } +pub(super) async fn handle_stop_sandbox( + state: &Arc, + request: Request, +) -> Result, Status> { + let name = request.into_inner().name; + if name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + + state.compute.stop_sandbox(&name).await?; + info!(sandbox_name = %name, "StopSandbox request completed successfully"); + Ok(Response::new(StopSandboxResponse {})) +} + +pub(super) async fn handle_start_sandbox( + state: &Arc, + request: Request, +) -> Result, Status> { + let name = request.into_inner().name; + if name.is_empty() { + return Err(Status::invalid_argument("name is required")); + } + + let started = state.compute.start_sandbox(&name).await?; + info!(sandbox_name = %name, started, "StartSandbox request completed successfully"); + Ok(Response::new(StartSandboxResponse { started })) +} + async fn sandbox_by_name(state: &Arc, name: &str) -> Result { if name.is_empty() { return Err(Status::invalid_argument("sandbox_name is required")); @@ -3125,4 +3154,62 @@ mod tests { initial_version + 1 ); } + + // ---- handle_stop_sandbox / handle_start_sandbox ---- + + #[tokio::test] + async fn handle_stop_sandbox_rejects_empty_name() { + let state = test_server_state().await; + let err = handle_stop_sandbox( + &state, + Request::new(StopSandboxRequest { + name: String::new(), + }), + ) + .await + .expect_err("empty name should be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + } + + #[tokio::test] + async fn handle_stop_sandbox_returns_not_found_for_unknown_sandbox() { + let state = test_server_state().await; + let err = handle_stop_sandbox( + &state, + Request::new(StopSandboxRequest { + name: "ghost".to_string(), + }), + ) + .await + .expect_err("unknown sandbox should be NotFound"); + assert_eq!(err.code(), tonic::Code::NotFound); + } + + #[tokio::test] + async fn handle_start_sandbox_rejects_empty_name() { + let state = test_server_state().await; + let err = handle_start_sandbox( + &state, + Request::new(StartSandboxRequest { + name: String::new(), + }), + ) + .await + .expect_err("empty name should be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + } + + #[tokio::test] + async fn handle_start_sandbox_returns_not_found_for_unknown_sandbox() { + let state = test_server_state().await; + let err = handle_start_sandbox( + &state, + Request::new(StartSandboxRequest { + name: "ghost".to_string(), + }), + ) + .await + .expect_err("unknown sandbox should be NotFound"); + assert_eq!(err.code(), tonic::Code::NotFound); + } } diff --git a/crates/openshell-server/tests/common/mod.rs b/crates/openshell-server/tests/common/mod.rs index 3a8ecb5b3..6d912a910 100644 --- a/crates/openshell-server/tests/common/mod.rs +++ b/crates/openshell-server/tests/common/mod.rs @@ -22,7 +22,8 @@ use openshell_core::proto::{ GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse, ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RelayFrame, RevokeSshSessionRequest, RevokeSshSessionResponse, - SandboxResponse, SandboxStreamEvent, ServiceStatus, SupervisorMessage, TcpForwardFrame, + SandboxResponse, SandboxStreamEvent, ServiceStatus, StartSandboxRequest, StartSandboxResponse, + StopSandboxRequest, StopSandboxResponse, SupervisorMessage, TcpForwardFrame, UpdateProviderRequest, WatchSandboxRequest, open_shell_server::{OpenShell, OpenShellServer}, }; @@ -110,6 +111,20 @@ impl OpenShell for TestOpenShell { Ok(Response::new(DeleteSandboxResponse { deleted: true })) } + async fn stop_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StopSandboxResponse {})) + } + + async fn start_sandbox( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Ok(Response::new(StartSandboxResponse { started: true })) + } + async fn get_sandbox_config( &self, _request: tonic::Request, diff --git a/crates/openshell-server/tests/supervisor_relay_integration.rs b/crates/openshell-server/tests/supervisor_relay_integration.rs index aae6d8cf1..a52f3b681 100644 --- a/crates/openshell-server/tests/supervisor_relay_integration.rs +++ b/crates/openshell-server/tests/supervisor_relay_integration.rs @@ -153,6 +153,18 @@ impl OpenShell for RelayGateway { ) -> Result, Status> { Err(Status::unimplemented("unused")) } + async fn stop_sandbox( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } + async fn start_sandbox( + &self, + _: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("unused")) + } async fn get_sandbox_config( &self, _: tonic::Request, diff --git a/proto/openshell.proto b/proto/openshell.proto index 21bb04d6d..9fedeccf7 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -45,6 +45,18 @@ service OpenShell { // Delete a sandbox by name. rpc DeleteSandbox(DeleteSandboxRequest) returns (DeleteSandboxResponse); + // Stop a sandbox container without deleting it. Workspace volume, + // provider links, and the sandbox record survive; the container is + // stopped via the underlying compute driver. Phase drifts from the + // backend watch stream. Use StartSandbox to bring it back live. + rpc StopSandbox(StopSandboxRequest) returns (StopSandboxResponse); + + // Start a previously-stopped sandbox container. Idempotent: returns + // success when the container is already running. Fails with NotFound + // when the sandbox record exists but its backend resource has been + // pruned. + rpc StartSandbox(StartSandboxRequest) returns (StartSandboxResponse); + // Create a short-lived SSH session for a sandbox. rpc CreateSshSession(CreateSshSessionRequest) returns (CreateSshSessionResponse); @@ -470,6 +482,29 @@ message DeleteSandboxResponse { bool deleted = 1; } +// Stop sandbox request. +message StopSandboxRequest { + // Sandbox name (canonical lookup key). + string name = 1; +} + +// Stop sandbox response. +message StopSandboxResponse {} + +// Start sandbox request. +message StartSandboxRequest { + // Sandbox name (canonical lookup key). + string name = 1; +} + +// Start sandbox response. +message StartSandboxResponse { + // True when a backend resource was found and started (or already + // running). False when the sandbox record exists but its backend + // resource is missing. + bool started = 1; +} + // Create SSH session request. message CreateSshSessionRequest { // Sandbox id.