From cb6163659f98828d7139fcfbdf8b225ee3eb8126 Mon Sep 17 00:00:00 2001 From: Joel Brewer Date: Mon, 4 May 2026 09:50:43 -0500 Subject: [PATCH] fix(continuum-core/gpu): detect Vulkan via vulkaninfo (was missing entirely) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit detect_gpu() in memory_manager.rs only had Metal and CUDA branches. Vulkan was listed as a "supported path" in the panic message + Cargo features but never actually wired into detection. Result: every continuum-core-vulkan build panicked at boot with "No GPU detected" regardless of whether a Vulkan ICD was present (NVIDIA, mesa-radv, mesa-llvmpipe, etc). Caught live during Carl-Windows install retest of the vulkan variant on bigmama-1 (continuum-b69f, 2026-05-04): freshly-built continuum-core-vulkan:108bbc33d image had libvulkan1 + mesa-vulkan-drivers + vulkan-tools installed in the runtime stage, but the binary never asked the loader anything — it fell straight through detect_gpu()'s if-cuda-cfg → panic. Fix: add detect_vulkan() that mirrors detect_cuda's nvidia-smi subprocess approach. Calls vulkaninfo --summary (already in the runtime image via the vulkan-tools apt package), parses the first deviceName line. Works with any ICD: NVIDIA's loader on a GPU host, mesa-llvmpipe (software) on a no-/dev/dri runner like ubuntu-latest CI, mesa-radv on AMD, etc. Memory size is conservative (4 GiB) because vulkaninfo --summary doesn't reliably report device-local heap totals across all ICDs without pulling in `ash`. Real allocations go through the Vulkan loader at runtime via candle/llama.cpp's vulkan backend, so this number only seeds GpuMemoryManager's budget estimator. Unblocks: PR #1038 (drop core variant + default to vulkan) and #1035 (canary→main), both of which were stuck on the smoke gate that requires a vulkan binary to actually start. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../continuum-core/src/gpu/memory_manager.rs | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/workers/continuum-core/src/gpu/memory_manager.rs b/src/workers/continuum-core/src/gpu/memory_manager.rs index 891e1d2ed..f184afee6 100644 --- a/src/workers/continuum-core/src/gpu/memory_manager.rs +++ b/src/workers/continuum-core/src/gpu/memory_manager.rs @@ -750,6 +750,24 @@ fn detect_gpu() -> (u64, String) { } } + // Try Vulkan. Until 2026-05-04 detect_gpu() had no vulkan branch even + // though `vulkan` was listed as a supported path in the panic message + // and Cargo features. Result: continuum-core-vulkan binary panicked at + // boot on every host because the loader was never queried, regardless + // of whether a Vulkan ICD was present (NVIDIA, mesa-llvmpipe sw, + // mesa-radv, etc). Caught live by Carl-Windows install retest of the + // vulkan variant on bigmama-1 (continuum-b69f, 2026-05-04) — the + // image had libvulkan1 + mesa-vulkan-drivers + vulkan-tools but the + // binary never asked the loader. detect_vulkan() below mirrors the + // detect_cuda() subprocess shape, parsing `vulkaninfo --summary` + // (already in the runtime image via the vulkan-tools apt package). + #[cfg(feature = "vulkan")] + { + if let Some(result) = detect_vulkan() { + return result; + } + } + // No GPU detected. Per architecture, CPU fallback is forbidden // (#964 series / #980 GPU-fallback audit). Hard-fail with the same // shape install.sh's `IC_GPU_PATH=unsupported` branch uses: name @@ -818,6 +836,62 @@ fn detect_cuda() -> Option<(u64, String)> { Some((total_bytes, name)) } +/// Vulkan detection via vulkaninfo subprocess. +/// +/// Mirrors detect_cuda's nvidia-smi approach. The vulkan-tools apt package +/// (already in continuum-core-vulkan.Dockerfile's runtime stage) ships +/// vulkaninfo. Parsing --summary gives us a deviceName, which is enough +/// to satisfy the architectural rule "Vulkan loader produced a usable +/// device" — be it NVIDIA's ICD on a GPU host, mesa-radv on AMD, or +/// llvmpipe (mesa software ICD) on a no-/dev/dri runner like +/// ubuntu-latest CI. +/// +/// Memory size is conservative because vulkaninfo --summary doesn't +/// always report device-local heap totals reliably; runtime allocations +/// query the loader directly via candle/llama-cpp's vulkan backend +/// anyway, so this number is only used for the budget estimator. +#[cfg(feature = "vulkan")] +fn detect_vulkan() -> Option<(u64, String)> { + use std::process::Command; + + let output = Command::new("vulkaninfo").arg("--summary").output().ok()?; + + if !output.status.success() { + return None; + } + + let stdout = String::from_utf8(output.stdout).ok()?; + + // vulkaninfo --summary format (excerpt): + // Devices: + // ======== + // GPU0: + // apiVersion = 1.3.260 + // driverVersion = 0x0 + // vendorID = 0x10005 + // deviceID = 0x0 + // deviceType = PHYSICAL_DEVICE_TYPE_CPU + // deviceName = llvmpipe (LLVM 17.0.6, 256 bits) + // + // Take the FIRST deviceName (vulkaninfo orders discrete > integrated > CPU + // by default on most loaders). If absent, no usable ICD. + let device_name = stdout + .lines() + .find(|l| l.trim_start().starts_with("deviceName")) + .and_then(|l| l.split('=').nth(1)) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty())?; + + // Conservative VRAM budget: 4 GiB. Real allocations go through the + // Vulkan loader at runtime; this only seeds the GpuMemoryManager + // budget estimator. For a CUDA host we get exact memory.total via + // nvidia-smi; for Vulkan there's no equivalent single-line query + // that handles all ICDs uniformly without pulling in `ash`. + let total_bytes: u64 = 4 * 1024 * 1024 * 1024; + + Some((total_bytes, device_name)) +} + // detect_cpu_fallback() removed — see detect_gpu()'s panic for rationale. // CPU fallback is forbidden architecturally; absent GPU = absent system.