From cb6163659f98828d7139fcfbdf8b225ee3eb8126 Mon Sep 17 00:00:00 2001
From: Joel Brewer <joel@cambriantech.com>
Date: Mon, 4 May 2026 09:50:43 -0500
Subject: [PATCH] fix(continuum-core/gpu): detect Vulkan via vulkaninfo (was
 missing entirely)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

detect_gpu() in memory_manager.rs only had Metal and CUDA branches.
Vulkan was listed as a "supported path" in the panic message + Cargo
features but never actually wired into detection. Result: every
continuum-core-vulkan build panicked at boot with "No GPU detected"
regardless of whether a Vulkan ICD was present (NVIDIA, mesa-radv,
mesa-llvmpipe, etc).

Caught live during Carl-Windows install retest of the vulkan variant
on bigmama-1 (continuum-b69f, 2026-05-04): freshly-built
continuum-core-vulkan:108bbc33d image had libvulkan1 +
mesa-vulkan-drivers + vulkan-tools installed in the runtime stage,
but the binary never asked the loader anything — it fell straight
through detect_gpu()'s if-cuda-cfg → panic.

Fix: add detect_vulkan() that mirrors detect_cuda's nvidia-smi
subprocess approach. Calls vulkaninfo --summary (already in the
runtime image via the vulkan-tools apt package), parses the first
deviceName line. Works with any ICD: NVIDIA's loader on a GPU host,
mesa-llvmpipe (software) on a no-/dev/dri runner like
ubuntu-latest CI, mesa-radv on AMD, etc.

Memory size is conservative (4 GiB) because vulkaninfo --summary
doesn't reliably report device-local heap totals across all ICDs
without pulling in `ash`. Real allocations go through the Vulkan
loader at runtime via candle/llama.cpp's vulkan backend, so this
number only seeds GpuMemoryManager's budget estimator.

Unblocks: PR #1038 (drop core variant + default to vulkan) and
#1035 (canary→main), both of which were stuck on the smoke gate
that requires a vulkan binary to actually start.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../continuum-core/src/gpu/memory_manager.rs  | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/src/workers/continuum-core/src/gpu/memory_manager.rs b/src/workers/continuum-core/src/gpu/memory_manager.rs
index 891e1d2ed..f184afee6 100644
--- a/src/workers/continuum-core/src/gpu/memory_manager.rs
+++ b/src/workers/continuum-core/src/gpu/memory_manager.rs
@@ -750,6 +750,24 @@ fn detect_gpu() -> (u64, String) {
         }
     }
 
+    // Try Vulkan. Until 2026-05-04 detect_gpu() had no vulkan branch even
+    // though `vulkan` was listed as a supported path in the panic message
+    // and Cargo features. Result: continuum-core-vulkan binary panicked at
+    // boot on every host because the loader was never queried, regardless
+    // of whether a Vulkan ICD was present (NVIDIA, mesa-llvmpipe sw,
+    // mesa-radv, etc). Caught live by Carl-Windows install retest of the
+    // vulkan variant on bigmama-1 (continuum-b69f, 2026-05-04) — the
+    // image had libvulkan1 + mesa-vulkan-drivers + vulkan-tools but the
+    // binary never asked the loader. detect_vulkan() below mirrors the
+    // detect_cuda() subprocess shape, parsing `vulkaninfo --summary`
+    // (already in the runtime image via the vulkan-tools apt package).
+    #[cfg(feature = "vulkan")]
+    {
+        if let Some(result) = detect_vulkan() {
+            return result;
+        }
+    }
+
     // No GPU detected. Per architecture, CPU fallback is forbidden
     // (#964 series / #980 GPU-fallback audit). Hard-fail with the same
     // shape install.sh's `IC_GPU_PATH=unsupported` branch uses: name
@@ -818,6 +836,62 @@ fn detect_cuda() -> Option<(u64, String)> {
     Some((total_bytes, name))
 }
 
+/// Vulkan detection via vulkaninfo subprocess.
+///
+/// Mirrors detect_cuda's nvidia-smi approach. The vulkan-tools apt package
+/// (already in continuum-core-vulkan.Dockerfile's runtime stage) ships
+/// vulkaninfo. Parsing --summary gives us a deviceName, which is enough
+/// to satisfy the architectural rule "Vulkan loader produced a usable
+/// device" — be it NVIDIA's ICD on a GPU host, mesa-radv on AMD, or
+/// llvmpipe (mesa software ICD) on a no-/dev/dri runner like
+/// ubuntu-latest CI.
+///
+/// Memory size is conservative because vulkaninfo --summary doesn't
+/// always report device-local heap totals reliably; runtime allocations
+/// query the loader directly via candle/llama-cpp's vulkan backend
+/// anyway, so this number is only used for the budget estimator.
+#[cfg(feature = "vulkan")]
+fn detect_vulkan() -> Option<(u64, String)> {
+    use std::process::Command;
+
+    let output = Command::new("vulkaninfo").arg("--summary").output().ok()?;
+
+    if !output.status.success() {
+        return None;
+    }
+
+    let stdout = String::from_utf8(output.stdout).ok()?;
+
+    // vulkaninfo --summary format (excerpt):
+    //   Devices:
+    //   ========
+    //   GPU0:
+    //           apiVersion         = 1.3.260
+    //           driverVersion      = 0x0
+    //           vendorID           = 0x10005
+    //           deviceID           = 0x0
+    //           deviceType         = PHYSICAL_DEVICE_TYPE_CPU
+    //           deviceName         = llvmpipe (LLVM 17.0.6, 256 bits)
+    //
+    // Take the FIRST deviceName (vulkaninfo orders discrete > integrated > CPU
+    // by default on most loaders). If absent, no usable ICD.
+    let device_name = stdout
+        .lines()
+        .find(|l| l.trim_start().starts_with("deviceName"))
+        .and_then(|l| l.split('=').nth(1))
+        .map(|s| s.trim().to_string())
+        .filter(|s| !s.is_empty())?;
+
+    // Conservative VRAM budget: 4 GiB. Real allocations go through the
+    // Vulkan loader at runtime; this only seeds the GpuMemoryManager
+    // budget estimator. For a CUDA host we get exact memory.total via
+    // nvidia-smi; for Vulkan there's no equivalent single-line query
+    // that handles all ICDs uniformly without pulling in `ash`.
+    let total_bytes: u64 = 4 * 1024 * 1024 * 1024;
+
+    Some((total_bytes, device_name))
+}
+
 // detect_cpu_fallback() removed — see detect_gpu()'s panic for rationale.
 // CPU fallback is forbidden architecturally; absent GPU = absent system.