From a46d9c6a8cdea4d3aca9adcd2b7fa76ba697a798 Mon Sep 17 00:00:00 2001 From: bjoern Date: Thu, 5 Feb 2026 18:18:24 +0100 Subject: [PATCH] Add OLLAMA_KEEP_ALIVE parameter support - New OLLAMA_KEEP_ALIVE env var controls how long models stay loaded - Accepts: -1 (permanent), 0 (immediate unload), "10m", "24h", or seconds - Passed to Ollama API requests via keep_alive parameter --- src/clients/databricks.js | 11 +++++++++++ src/config/index.js | 3 +++ 2 files changed, 14 insertions(+) diff --git a/src/clients/databricks.js b/src/clients/databricks.js index ebfb3fd..1fb1a4b 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -308,6 +308,17 @@ async function invokeOllama(body) { }, }; + // Add keep_alive if configured (controls how long model stays loaded) + // Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload) + if (config.ollama.keepAlive !== undefined) { + const keepAlive = config.ollama.keepAlive; + // Parse as number if it looks like one, otherwise use string + ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive) + ? parseInt(keepAlive, 10) + : keepAlive; + logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured"); + } + // Inject standard tools if client didn't send any (passthrough mode) let toolsToSend = body.tools; let toolsInjected = false; diff --git a/src/config/index.js b/src/config/index.js index dbe3df6..f9cbad6 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -86,6 +86,8 @@ const azureAnthropicVersion = process.env.AZURE_ANTHROPIC_VERSION ?? "2023-06-01 const ollamaEndpoint = process.env.OLLAMA_ENDPOINT ?? "http://localhost:11434"; const ollamaModel = process.env.OLLAMA_MODEL ?? "qwen2.5-coder:7b"; const ollamaTimeout = Number.parseInt(process.env.OLLAMA_TIMEOUT_MS ?? "120000", 10); +const ollamaKeepAlive = process.env.OLLAMA_KEEP_ALIVE ?? undefined; +// Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload) const ollamaEmbeddingsEndpoint = process.env.OLLAMA_EMBEDDINGS_ENDPOINT ?? `${ollamaEndpoint}/api/embeddings`; const ollamaEmbeddingsModel = process.env.OLLAMA_EMBEDDINGS_MODEL ?? "nomic-embed-text"; @@ -470,6 +472,7 @@ const config = { endpoint: ollamaEndpoint, model: ollamaModel, timeout: Number.isNaN(ollamaTimeout) ? 120000 : ollamaTimeout, + keepAlive: ollamaKeepAlive, embeddingsEndpoint: ollamaEmbeddingsEndpoint, embeddingsModel: ollamaEmbeddingsModel, },