Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/clients/databricks.js
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ async function invokeOllama(body) {
: keepAlive;
logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured");
}

// Check if model supports tools FIRST (before wasteful injection)
const supportsTools = await checkOllamaToolSupport(config.ollama.model);

Expand Down
120 changes: 120 additions & 0 deletions src/clients/ollama-startup.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
const config = require("../config");
const logger = require("../logger");

const POLL_INTERVAL_MS = 5000; // 5 seconds
const MAX_WAIT_MS = 60000; // 60 seconds

/**
* Wait for Ollama server to be ready and model to be loaded.
* Only runs when Ollama is the configured provider.
*
* @returns {Promise<boolean>} true if Ollama is ready, false if timeout
*/
async function waitForOllama() {
const endpoint = config.ollama?.endpoint;
const model = config.ollama?.model;

if (!endpoint) {
return true;
}

console.log(`[Ollama] Waiting for server at ${endpoint}...`);
console.log(`[Ollama] Model: ${model}`);

const startTime = Date.now();
let attempt = 0;

while (Date.now() - startTime < MAX_WAIT_MS) {
attempt++;
const elapsed = Math.round((Date.now() - startTime) / 1000);

try {
// Check if server is reachable
const tagsResponse = await fetch(`${endpoint}/api/tags`, {
signal: AbortSignal.timeout(5000)
});

if (!tagsResponse.ok) {
console.log(`[Ollama] Server not ready (${elapsed}s elapsed)...`);
await sleep(POLL_INTERVAL_MS);
continue;
}

const tagsData = await tagsResponse.json();
const models = tagsData.models || [];
const modelNames = models.map(m => m.name);

// Check if our model is available
const modelReady = modelNames.some(name =>
name === model || name.startsWith(`${model}:`)
);

if (modelReady) {
console.log(`[Ollama] Server ready, model "${model}" available (${elapsed}s)`);
logger.info({
endpoint,
model,
elapsedSeconds: elapsed,
attempts: attempt
}, "Ollama startup check passed");
return true;
}

// Model not yet available - try to preload it
console.log(`[Ollama] Server up, loading model "${model}" (${elapsed}s elapsed)...`);
logger.info({
endpoint,
model,
availableModels: modelNames
}, "Ollama server up, preloading model");

// Preload model with empty generate request
try {
const preloadBody = { model, prompt: "", stream: false };

// Use keep_alive setting if configured
if (config.ollama.keepAlive !== undefined) {
const keepAlive = config.ollama.keepAlive;
preloadBody.keep_alive = /^-?\d+$/.test(keepAlive)
? parseInt(keepAlive, 10)
: keepAlive;
}

await fetch(`${endpoint}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(preloadBody),
signal: AbortSignal.timeout(30000)
});
} catch (preloadErr) {
// Ignore preload errors, we'll check again on next iteration
logger.debug({ error: preloadErr.message }, "Ollama model preload request failed (will retry)");
}

} catch (err) {
console.log(`[Ollama] Waiting for server (${elapsed}s elapsed)...`);
logger.debug({
error: err.message,
attempt,
elapsed
}, "Ollama server not yet reachable");
}

await sleep(POLL_INTERVAL_MS);
}

console.error(`[Ollama] Timeout after 60s - server or model not ready`);
console.error(`[Ollama] Continuing startup, but requests may fail`);
logger.warn({
endpoint,
model,
maxWaitMs: MAX_WAIT_MS
}, "Ollama startup check timed out - continuing anyway");
return false;
}

function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}

module.exports = { waitForOllama };
8 changes: 8 additions & 0 deletions src/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const { initializeHeadroom, shutdownHeadroom, getHeadroomManager } = require("./
const { getWorkerPool, isWorkerPoolReady } = require("./workers/pool");
const lazyLoader = require("./tools/lazy-loader");
const { setLazyLoader } = require("./tools");
const { waitForOllama } = require("./clients/ollama-startup");

// Initialize MCP
initialiseMcp();
Expand Down Expand Up @@ -199,6 +200,13 @@ async function start() {
}

const app = createApp();

// Wait for Ollama if it's the configured provider or preferred for routing
const provider = config.modelProvider?.type?.toLowerCase();
if (provider === "ollama" || config.modelProvider?.preferOllama) {
await waitForOllama();
}

const server = app.listen(config.port, () => {
console.log(`Claude→Databricks proxy listening on http://localhost:${config.port}`);
});
Expand Down
Loading