From a46d9c6a8cdea4d3aca9adcd2b7fa76ba697a798 Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Thu, 5 Feb 2026 18:18:24 +0100
Subject: [PATCH 1/2] Add OLLAMA_KEEP_ALIVE parameter support

- New OLLAMA_KEEP_ALIVE env var controls how long models stay loaded
- Accepts: -1 (permanent), 0 (immediate unload), "10m", "24h", or seconds
- Passed to Ollama API requests via keep_alive parameter
---
 src/clients/databricks.js | 11 +++++++++++
 src/config/index.js       |  3 +++
 2 files changed, 14 insertions(+)
diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index ebfb3fd..1fb1a4b 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -308,6 +308,17 @@ async function invokeOllama(body) {
     },
   };
 
+  // Add keep_alive if configured (controls how long model stays loaded)
+  // Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload)
+  if (config.ollama.keepAlive !== undefined) {
+    const keepAlive = config.ollama.keepAlive;
+    // Parse as number if it looks like one, otherwise use string
+    ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive)
+      ? parseInt(keepAlive, 10)
+      : keepAlive;
+    logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured");
+  }
+
   // Inject standard tools if client didn't send any (passthrough mode)
   let toolsToSend = body.tools;
   let toolsInjected = false;
diff --git a/src/config/index.js b/src/config/index.js
index dbe3df6..f9cbad6 100644
--- a/src/config/index.js
+++ b/src/config/index.js
@@ -86,6 +86,8 @@ const azureAnthropicVersion = process.env.AZURE_ANTHROPIC_VERSION ?? "2023-06-01
 const ollamaEndpoint = process.env.OLLAMA_ENDPOINT ?? "http://localhost:11434";
 const ollamaModel = process.env.OLLAMA_MODEL ?? "qwen2.5-coder:7b";
 const ollamaTimeout = Number.parseInt(process.env.OLLAMA_TIMEOUT_MS ?? "120000", 10);
+const ollamaKeepAlive = process.env.OLLAMA_KEEP_ALIVE ?? undefined;
+// Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload)
 const ollamaEmbeddingsEndpoint = process.env.OLLAMA_EMBEDDINGS_ENDPOINT ?? `${ollamaEndpoint}/api/embeddings`;
 const ollamaEmbeddingsModel = process.env.OLLAMA_EMBEDDINGS_MODEL ?? "nomic-embed-text";
 
@@ -470,6 +472,7 @@ const config = {
     endpoint: ollamaEndpoint,
     model: ollamaModel,
     timeout: Number.isNaN(ollamaTimeout) ? 120000 : ollamaTimeout,
+    keepAlive: ollamaKeepAlive,
     embeddingsEndpoint: ollamaEmbeddingsEndpoint,
     embeddingsModel: ollamaEmbeddingsModel,
   },

From f2ae3fa35e28b5935a1a1656b9b0aa043bb402fb Mon Sep 17 00:00:00 2001
From: bjoern <developer@call-home.ch>
Date: Thu, 5 Feb 2026 18:21:13 +0100
Subject: [PATCH 2/2] Add Ollama startup health check

- New ollama-startup.js module polls server for up to 60s before startup
- Checks server reachability via /api/tags endpoint
- Verifies configured model is available
- Preloads model with keep_alive setting if configured
- Server startup waits for Ollama when configured as provider

Note: Only triggered when MODEL_PROVIDER=ollama or PREFER_OLLAMA=true.
When Ollama is FALLBACK_PROVIDER, model loads on first fallback use.
---
 src/clients/ollama-startup.js | 120 ++++++++++++++++++++++++++++++++++
 src/server.js                 |  10 ++-
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 src/clients/ollama-startup.js

diff --git a/src/clients/ollama-startup.js b/src/clients/ollama-startup.js
new file mode 100644
index 0000000..4e3565f
--- /dev/null
+++ b/src/clients/ollama-startup.js
@@ -0,0 +1,120 @@
+const config = require("../config");
+const logger = require("../logger");
+
+const POLL_INTERVAL_MS = 5000;  // 5 seconds
+const MAX_WAIT_MS = 60000;      // 60 seconds
+
+/**
+ * Wait for Ollama server to be ready and model to be loaded.
+ * Only runs when Ollama is the configured provider.
+ *
+ * @returns {Promise<boolean>} true if Ollama is ready, false if timeout
+ */
+async function waitForOllama() {
+  const endpoint = config.ollama?.endpoint;
+  const model = config.ollama?.model;
+
+  if (!endpoint) {
+    return true;
+  }
+
+  console.log(`[Ollama] Waiting for server at ${endpoint}...`);
+  console.log(`[Ollama] Model: ${model}`);
+
+  const startTime = Date.now();
+  let attempt = 0;
+
+  while (Date.now() - startTime < MAX_WAIT_MS) {
+    attempt++;
+    const elapsed = Math.round((Date.now() - startTime) / 1000);
+
+    try {
+      // Check if server is reachable
+      const tagsResponse = await fetch(`${endpoint}/api/tags`, {
+        signal: AbortSignal.timeout(5000)
+      });
+
+      if (!tagsResponse.ok) {
+        console.log(`[Ollama] Server not ready (${elapsed}s elapsed)...`);
+        await sleep(POLL_INTERVAL_MS);
+        continue;
+      }
+
+      const tagsData = await tagsResponse.json();
+      const models = tagsData.models || [];
+      const modelNames = models.map(m => m.name);
+
+      // Check if our model is available
+      const modelReady = modelNames.some(name =>
+        name === model || name.startsWith(`${model}:`)
+      );
+
+      if (modelReady) {
+        console.log(`[Ollama] Server ready, model "${model}" available (${elapsed}s)`);
+        logger.info({
+          endpoint,
+          model,
+          elapsedSeconds: elapsed,
+          attempts: attempt
+        }, "Ollama startup check passed");
+        return true;
+      }
+
+      // Model not yet available - try to preload it
+      console.log(`[Ollama] Server up, loading model "${model}" (${elapsed}s elapsed)...`);
+      logger.info({
+        endpoint,
+        model,
+        availableModels: modelNames
+      }, "Ollama server up, preloading model");
+
+      // Preload model with empty generate request
+      try {
+        const preloadBody = { model, prompt: "", stream: false };
+
+        // Use keep_alive setting if configured
+        if (config.ollama.keepAlive !== undefined) {
+          const keepAlive = config.ollama.keepAlive;
+          preloadBody.keep_alive = /^-?\d+$/.test(keepAlive)
+            ? parseInt(keepAlive, 10)
+            : keepAlive;
+        }
+
+        await fetch(`${endpoint}/api/generate`, {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify(preloadBody),
+          signal: AbortSignal.timeout(30000)
+        });
+      } catch (preloadErr) {
+        // Ignore preload errors, we'll check again on next iteration
+        logger.debug({ error: preloadErr.message }, "Ollama model preload request failed (will retry)");
+      }
+
+    } catch (err) {
+      console.log(`[Ollama] Waiting for server (${elapsed}s elapsed)...`);
+      logger.debug({
+        error: err.message,
+        attempt,
+        elapsed
+      }, "Ollama server not yet reachable");
+    }
+
+    await sleep(POLL_INTERVAL_MS);
+  }
+
+  console.error(`[Ollama] Timeout after 60s - server or model not ready`);
+  console.error(`[Ollama] Continuing startup, but requests may fail`);
+  logger.warn({
+    endpoint,
+    model,
+    maxWaitMs: MAX_WAIT_MS
+  }, "Ollama startup check timed out - continuing anyway");
+  return false;
+}
+
+function sleep(ms) {
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+
+module.exports = { waitForOllama };
diff --git a/src/server.js b/src/server.js
index 25c6938..f109134 100644
--- a/src/server.js
+++ b/src/server.js
@@ -28,6 +28,7 @@ const { registerTestTools } = require("./tools/tests");
 const { registerMcpTools } = require("./tools/mcp");
 const { registerAgentTaskTool } = require("./tools/agent-task");
 const { initConfigWatcher, getConfigWatcher } = require("./config/watcher");
+const { waitForOllama } = require("./clients/ollama-startup");
 
 initialiseMcp();
 registerStubTools();
@@ -121,8 +122,15 @@ function createApp() {
   return app;
 }
 
-function start() {
+async function start() {
   const app = createApp();
+
+  // Wait for Ollama if it's the configured provider or preferred for routing
+  const provider = config.modelProvider?.type?.toLowerCase();
+  if (provider === "ollama" || config.modelProvider?.preferOllama) {
+    await waitForOllama();
+  }
+
   const server = app.listen(config.port, () => {
     console.log(`Claude→Databricks proxy listening on http://localhost:${config.port}`);
   });