From 6444f64fbf605fb7588ce3d8b89cd6bfccf5cb5e Mon Sep 17 00:00:00 2001 From: bjoern Date: Tue, 10 Feb 2026 18:24:11 +0100 Subject: [PATCH] Add TOPIC_DETECTION_MODEL env var for topic detection model override Problem: Topic detection requests use the same large model as the main request. For users running local models, this adds unnecessary GPU load for a simple classification task. There is no way to redirect topic detection to a lighter, faster model. Changes implemented: 1. Configuration (src/config/index.js) - Added TOPIC_DETECTION_MODEL env var, defaulting to "default" (use main model) - When set to a model name, topic detection requests use that model instead - Added to config object and hot reload support in reloadConfig() Testing: - TOPIC_DETECTION_MODEL=default (or unset): unchanged behavior - TOPIC_DETECTION_MODEL=llama3.2:1b: config correctly reads the value - Hot reload picks up changes without restart - npm run test:unit passes with no regressions --- src/config/index.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/config/index.js b/src/config/index.js index 6c1a698..15e2d39 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -136,6 +136,10 @@ const zaiModel = process.env.ZAI_MODEL?.trim() || "GLM-4.7"; const vertexApiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null; const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash"; +// Topic detection model override +// Values: "default" (use main model) or a model name to redirect topic detection to a lighter model +const topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim(); + // Hot reload configuration const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10); @@ -596,6 +600,7 @@ var config = { modelProvider: { type: modelProvider, defaultModel, + topicDetectionModel, // Hybrid routing settings preferOllama, fallbackEnabled, @@ -885,6 +890,7 @@ function reloadConfig() { config.modelProvider.preferOllama = process.env.PREFER_OLLAMA === "true"; config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false"; config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase(); + config.modelProvider.topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim(); // Log level config.logger.level = process.env.LOG_LEVEL ?? "info";