diff --git a/src/clients/databricks.js b/src/clients/databricks.js index d2d0d03..78b6ac4 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -319,6 +319,17 @@ async function invokeOllama(body) { }, }; + // Add keep_alive if configured (controls how long model stays loaded) + // Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload) + if (config.ollama.keepAlive !== undefined) { + const keepAlive = config.ollama.keepAlive; + // Parse as number if it looks like one, otherwise use string + ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive) + ? parseInt(keepAlive, 10) + : keepAlive; + logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured"); + } + // Check if model supports tools FIRST (before wasteful injection) const supportsTools = await checkOllamaToolSupport(config.ollama.model); diff --git a/src/config/index.js b/src/config/index.js index 51cc548..a8970ba 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -86,6 +86,8 @@ const azureAnthropicVersion = process.env.AZURE_ANTHROPIC_VERSION ?? "2023-06-01 const ollamaEndpoint = process.env.OLLAMA_ENDPOINT ?? "http://localhost:11434"; const ollamaModel = process.env.OLLAMA_MODEL ?? "qwen2.5-coder:7b"; const ollamaTimeout = Number.parseInt(process.env.OLLAMA_TIMEOUT_MS ?? "120000", 10); +const ollamaKeepAlive = process.env.OLLAMA_KEEP_ALIVE ?? undefined; +// Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload) const ollamaEmbeddingsEndpoint = process.env.OLLAMA_EMBEDDINGS_ENDPOINT ?? `${ollamaEndpoint}/api/embeddings`; const ollamaEmbeddingsModel = process.env.OLLAMA_EMBEDDINGS_MODEL ?? "nomic-embed-text"; @@ -537,6 +539,7 @@ var config = { endpoint: ollamaEndpoint, model: ollamaModel, timeout: Number.isNaN(ollamaTimeout) ? 120000 : ollamaTimeout, + keepAlive: ollamaKeepAlive, embeddingsEndpoint: ollamaEmbeddingsEndpoint, embeddingsModel: ollamaEmbeddingsModel, },