diff --git a/crates/forkd-controller/src/lib.rs b/crates/forkd-controller/src/lib.rs index acb7f47..31c1789 100644 --- a/crates/forkd-controller/src/lib.rs +++ b/crates/forkd-controller/src/lib.rs @@ -52,6 +52,11 @@ pub struct DaemonConfig { /// real disk. Must have enough free space to hold one /// guest-RAM-sized file per concurrent prewarmed child. pub prewarm_scratch_dir: PathBuf, + /// Maximum concurrent BRANCH operations the daemon will admit. Each + /// BRANCH writes a full `memory.bin` (typically 256 MiB - 8 GiB), so + /// the cap bounds peak transient disk usage during fan-outs. `None` + /// falls back to [`http::DEFAULT_BRANCH_CONCURRENCY`]. + pub branch_concurrency: Option, } impl Default for DaemonConfig { @@ -65,6 +70,7 @@ impl Default for DaemonConfig { tls_cert: None, tls_key: None, prewarm_scratch_dir: PathBuf::from("/dev/shm/forkd-prewarm"), + branch_concurrency: None, } } } @@ -104,14 +110,28 @@ pub async fn run_daemon(cfg: DaemonConfig) -> Result<()> { } }; + let branch_concurrency = cfg + .branch_concurrency + .unwrap_or(http::DEFAULT_BRANCH_CONCURRENCY); + if branch_concurrency == 0 { + anyhow::bail!( + "branch_concurrency must be > 0; got 0 (use the default {} if unsure)", + http::DEFAULT_BRANCH_CONCURRENCY + ); + } + if branch_concurrency != http::DEFAULT_BRANCH_CONCURRENCY { + tracing::info!( + branch_concurrency, + default = http::DEFAULT_BRANCH_CONCURRENCY, + "branch concurrency cap overridden" + ); + } let app_state = Arc::new(AppState { registry, live_vms: Mutex::new(HashMap::new()), snapshot_root: cfg.snapshot_root.clone(), branch_in_flight: Mutex::new(std::collections::HashSet::new()), - branch_sem: std::sync::Arc::new(tokio::sync::Semaphore::new( - http::DEFAULT_BRANCH_CONCURRENCY, - )), + branch_sem: std::sync::Arc::new(tokio::sync::Semaphore::new(branch_concurrency)), prewarm_scratch_dir: cfg.prewarm_scratch_dir.clone(), }); diff --git a/crates/forkd-controller/src/main.rs b/crates/forkd-controller/src/main.rs index 4560e17..2938351 100644 --- a/crates/forkd-controller/src/main.rs +++ b/crates/forkd-controller/src/main.rs @@ -69,6 +69,13 @@ enum Cmd { default_value = "/dev/shm/forkd-prewarm" )] prewarm_scratch_dir: PathBuf, + /// Maximum concurrent BRANCH operations the daemon will admit. + /// Each BRANCH writes a full memory.bin (typically 256 MiB - 8 GiB), + /// so this cap bounds peak transient disk usage during fan-outs. + /// Tune higher on hosts with fast NVMe + small snapshots; tune lower + /// on hosts with constrained disk. Defaults to 4. + #[arg(long, env = "FORKD_BRANCH_CONCURRENCY")] + branch_concurrency: Option, }, } @@ -92,6 +99,7 @@ async fn main() -> Result<()> { tls_cert, tls_key, prewarm_scratch_dir, + branch_concurrency, } => { let defaults = DaemonConfig::default(); run_daemon(DaemonConfig { @@ -103,6 +111,7 @@ async fn main() -> Result<()> { tls_cert, tls_key, prewarm_scratch_dir, + branch_concurrency, }) .await } diff --git a/crates/forkd-controller/tests/http_integration.rs b/crates/forkd-controller/tests/http_integration.rs index d6473ed..c4eaec6 100644 --- a/crates/forkd-controller/tests/http_integration.rs +++ b/crates/forkd-controller/tests/http_integration.rs @@ -57,6 +57,7 @@ impl TestDaemon { tls_cert: None, tls_key: None, prewarm_scratch_dir: td.path().join("prewarm"), + branch_concurrency: None, }; // We don't have a clean shutdown hook from outside (run_daemon @@ -196,6 +197,7 @@ impl TlsTestDaemon { tls_cert: Some(cert_path), tls_key: Some(key_path), prewarm_scratch_dir: td.path().join("prewarm"), + branch_concurrency: None, }; let (tx, rx) = tokio::sync::oneshot::channel::<()>();