From a46d9c6a8cdea4d3aca9adcd2b7fa76ba697a798 Mon Sep 17 00:00:00 2001 From: bjoern Date: Thu, 5 Feb 2026 18:18:24 +0100 Subject: [PATCH 1/2] Add OLLAMA_KEEP_ALIVE parameter support - New OLLAMA_KEEP_ALIVE env var controls how long models stay loaded - Accepts: -1 (permanent), 0 (immediate unload), "10m", "24h", or seconds - Passed to Ollama API requests via keep_alive parameter --- src/clients/databricks.js | 11 +++++++++++ src/config/index.js | 3 +++ 2 files changed, 14 insertions(+) diff --git a/src/clients/databricks.js b/src/clients/databricks.js index ebfb3fd..1fb1a4b 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -308,6 +308,17 @@ async function invokeOllama(body) { }, }; + // Add keep_alive if configured (controls how long model stays loaded) + // Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload) + if (config.ollama.keepAlive !== undefined) { + const keepAlive = config.ollama.keepAlive; + // Parse as number if it looks like one, otherwise use string + ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive) + ? parseInt(keepAlive, 10) + : keepAlive; + logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured"); + } + // Inject standard tools if client didn't send any (passthrough mode) let toolsToSend = body.tools; let toolsInjected = false; diff --git a/src/config/index.js b/src/config/index.js index dbe3df6..f9cbad6 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -86,6 +86,8 @@ const azureAnthropicVersion = process.env.AZURE_ANTHROPIC_VERSION ?? "2023-06-01 const ollamaEndpoint = process.env.OLLAMA_ENDPOINT ?? "http://localhost:11434"; const ollamaModel = process.env.OLLAMA_MODEL ?? "qwen2.5-coder:7b"; const ollamaTimeout = Number.parseInt(process.env.OLLAMA_TIMEOUT_MS ?? "120000", 10); +const ollamaKeepAlive = process.env.OLLAMA_KEEP_ALIVE ?? undefined; +// Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload) const ollamaEmbeddingsEndpoint = process.env.OLLAMA_EMBEDDINGS_ENDPOINT ?? `${ollamaEndpoint}/api/embeddings`; const ollamaEmbeddingsModel = process.env.OLLAMA_EMBEDDINGS_MODEL ?? "nomic-embed-text"; @@ -470,6 +472,7 @@ const config = { endpoint: ollamaEndpoint, model: ollamaModel, timeout: Number.isNaN(ollamaTimeout) ? 120000 : ollamaTimeout, + keepAlive: ollamaKeepAlive, embeddingsEndpoint: ollamaEmbeddingsEndpoint, embeddingsModel: ollamaEmbeddingsModel, }, From f2ae3fa35e28b5935a1a1656b9b0aa043bb402fb Mon Sep 17 00:00:00 2001 From: bjoern Date: Thu, 5 Feb 2026 18:21:13 +0100 Subject: [PATCH 2/2] Add Ollama startup health check - New ollama-startup.js module polls server for up to 60s before startup - Checks server reachability via /api/tags endpoint - Verifies configured model is available - Preloads model with keep_alive setting if configured - Server startup waits for Ollama when configured as provider Note: Only triggered when MODEL_PROVIDER=ollama or PREFER_OLLAMA=true. When Ollama is FALLBACK_PROVIDER, model loads on first fallback use. --- src/clients/ollama-startup.js | 120 ++++++++++++++++++++++++++++++++++ src/server.js | 10 ++- 2 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 src/clients/ollama-startup.js diff --git a/src/clients/ollama-startup.js b/src/clients/ollama-startup.js new file mode 100644 index 0000000..4e3565f --- /dev/null +++ b/src/clients/ollama-startup.js @@ -0,0 +1,120 @@ +const config = require("../config"); +const logger = require("../logger"); + +const POLL_INTERVAL_MS = 5000; // 5 seconds +const MAX_WAIT_MS = 60000; // 60 seconds + +/** + * Wait for Ollama server to be ready and model to be loaded. + * Only runs when Ollama is the configured provider. + * + * @returns {Promise} true if Ollama is ready, false if timeout + */ +async function waitForOllama() { + const endpoint = config.ollama?.endpoint; + const model = config.ollama?.model; + + if (!endpoint) { + return true; + } + + console.log(`[Ollama] Waiting for server at ${endpoint}...`); + console.log(`[Ollama] Model: ${model}`); + + const startTime = Date.now(); + let attempt = 0; + + while (Date.now() - startTime < MAX_WAIT_MS) { + attempt++; + const elapsed = Math.round((Date.now() - startTime) / 1000); + + try { + // Check if server is reachable + const tagsResponse = await fetch(`${endpoint}/api/tags`, { + signal: AbortSignal.timeout(5000) + }); + + if (!tagsResponse.ok) { + console.log(`[Ollama] Server not ready (${elapsed}s elapsed)...`); + await sleep(POLL_INTERVAL_MS); + continue; + } + + const tagsData = await tagsResponse.json(); + const models = tagsData.models || []; + const modelNames = models.map(m => m.name); + + // Check if our model is available + const modelReady = modelNames.some(name => + name === model || name.startsWith(`${model}:`) + ); + + if (modelReady) { + console.log(`[Ollama] Server ready, model "${model}" available (${elapsed}s)`); + logger.info({ + endpoint, + model, + elapsedSeconds: elapsed, + attempts: attempt + }, "Ollama startup check passed"); + return true; + } + + // Model not yet available - try to preload it + console.log(`[Ollama] Server up, loading model "${model}" (${elapsed}s elapsed)...`); + logger.info({ + endpoint, + model, + availableModels: modelNames + }, "Ollama server up, preloading model"); + + // Preload model with empty generate request + try { + const preloadBody = { model, prompt: "", stream: false }; + + // Use keep_alive setting if configured + if (config.ollama.keepAlive !== undefined) { + const keepAlive = config.ollama.keepAlive; + preloadBody.keep_alive = /^-?\d+$/.test(keepAlive) + ? parseInt(keepAlive, 10) + : keepAlive; + } + + await fetch(`${endpoint}/api/generate`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(preloadBody), + signal: AbortSignal.timeout(30000) + }); + } catch (preloadErr) { + // Ignore preload errors, we'll check again on next iteration + logger.debug({ error: preloadErr.message }, "Ollama model preload request failed (will retry)"); + } + + } catch (err) { + console.log(`[Ollama] Waiting for server (${elapsed}s elapsed)...`); + logger.debug({ + error: err.message, + attempt, + elapsed + }, "Ollama server not yet reachable"); + } + + await sleep(POLL_INTERVAL_MS); + } + + console.error(`[Ollama] Timeout after 60s - server or model not ready`); + console.error(`[Ollama] Continuing startup, but requests may fail`); + logger.warn({ + endpoint, + model, + maxWaitMs: MAX_WAIT_MS + }, "Ollama startup check timed out - continuing anyway"); + return false; +} + +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +module.exports = { waitForOllama }; diff --git a/src/server.js b/src/server.js index 25c6938..f109134 100644 --- a/src/server.js +++ b/src/server.js @@ -28,6 +28,7 @@ const { registerTestTools } = require("./tools/tests"); const { registerMcpTools } = require("./tools/mcp"); const { registerAgentTaskTool } = require("./tools/agent-task"); const { initConfigWatcher, getConfigWatcher } = require("./config/watcher"); +const { waitForOllama } = require("./clients/ollama-startup"); initialiseMcp(); registerStubTools(); @@ -121,8 +122,15 @@ function createApp() { return app; } -function start() { +async function start() { const app = createApp(); + + // Wait for Ollama if it's the configured provider or preferred for routing + const provider = config.modelProvider?.type?.toLowerCase(); + if (provider === "ollama" || config.modelProvider?.preferOllama) { + await waitForOllama(); + } + const server = app.listen(config.port, () => { console.log(`Claude→Databricks proxy listening on http://localhost:${config.port}`); });