From 2247bfafbc57f8cfcdf3fe8cf701009b7fd2e549 Mon Sep 17 00:00:00 2001 From: bjoern Date: Thu, 5 Feb 2026 21:05:45 +0100 Subject: [PATCH 1/7] Improve tool calling response handling - Add fallback parsing for Ollama models that return tool calls as JSON text in message content instead of using the structured tool_calls field - Return tool results directly to CLI instead of making a follow-up LLM call, reducing latency and preventing hallucinated rewrites of output - Add dedicated Glob tool returning plain text (one path per line) instead of JSON, with workspace_list accepting both 'pattern' and 'patterns' - Clarify why Glob is not aliased to workspace_list (format mismatch) --- src/clients/ollama-utils.js | 69 +++++++++++++++++++++++++++++++++++++ src/orchestrator/index.js | 47 ++++++++++++++++++++++++- src/tools/index.js | 4 +++ src/tools/indexer.js | 50 ++++++++++++++++++++++++--- 4 files changed, 165 insertions(+), 5 deletions(-) diff --git a/src/clients/ollama-utils.js b/src/clients/ollama-utils.js index 7582f05..2cd95c9 100644 --- a/src/clients/ollama-utils.js +++ b/src/clients/ollama-utils.js @@ -93,6 +93,65 @@ function convertAnthropicToolsToOllama(anthropicTools) { })); } +/** + * Extract tool call from text when LLM outputs JSON instead of using tool_calls + * Handles formats like: {"name": "Read", "parameters": {...}} + * + * @param {string} text - Text content that may contain JSON tool call + * @returns {object|null} - Tool call object in Ollama format, or null if not found + */ +function extractToolCallFromText(text) { + if (!text || typeof text !== 'string') return null; + + // Find potential JSON start - look for {"name" pattern + const startMatch = text.match(/\{\s*"name"\s*:/); + if (!startMatch) return null; + + const startIdx = startMatch.index; + + // Find matching closing brace using brace counting + let braceCount = 0; + let endIdx = -1; + for (let i = startIdx; i < text.length; i++) { + if (text[i] === '{') braceCount++; + else if (text[i] === '}') { + braceCount--; + if (braceCount === 0) { + endIdx = i + 1; + break; + } + } + } + + if (endIdx === -1) return null; + + const jsonStr = text.substring(startIdx, endIdx); + + try { + const parsed = JSON.parse(jsonStr); + + if (!parsed.name || !parsed.parameters) { + return null; + } + + logger.info({ + toolName: parsed.name, + params: parsed.parameters, + originalText: text.substring(0, 200) + }, "Extracted tool call from text content (fallback parsing)"); + + return { + function: { + name: parsed.name, + arguments: parsed.parameters + } + }; + } catch (e) { + logger.debug({ error: e.message, text: text.substring(0, 200) }, "Failed to parse extracted tool call"); + return null; + } +} + /** * Convert Ollama tool call response to Anthropic format * @@ -126,6 +185,15 @@ function convertOllamaToolCallsToAnthropic(ollamaResponse) { const toolCalls = message.tool_calls || []; const textContent = message.content || ""; + // FALLBACK: If no tool_calls but text contains JSON tool call, parse it + if (toolCalls.length === 0 && textContent) { + const extracted = extractToolCallFromText(textContent); + if (extracted) { + logger.info({ extractedTool: extracted.function?.name }, "Using fallback text parsing for tool call"); + toolCalls = [extracted]; + } + } + const contentBlocks = []; // Add text content if present @@ -217,4 +285,5 @@ module.exports = { convertOllamaToolCallsToAnthropic, buildAnthropicResponseFromOllama, modelNameSupportsTools, + extractToolCallFromText, }; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index d553b69..9596db6 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -2562,7 +2562,52 @@ IMPORTANT TOOL USAGE RULES: } } - continue; + logger.info({ + sessionId: session?.id ?? null, + step: steps, + toolCallsExecuted: toolCallsExecuted, + totalToolCallsInThisStep: toolCalls.length, + messageCount: cleanPayload.messages.length, + lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role, + }, "Tool execution complete"); + + // Return tool results directly to CLI - no more LLM call needed + // The tool result IS the answer (e.g., file contents for Read) + if (accumulatedToolResults.length > 0) { + auditLog("=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===", { + sessionId: session?.id ?? null, + toolResultCount: accumulatedToolResults.length, + toolNames: accumulatedToolResults.map(r => r.tool_name) + }); + + // Convert tool_result blocks to text blocks for CLI display + // The CLI only understands text/tool_use in responses, not tool_result + const directResponse = { + id: `msg_${Date.now()}`, + type: "message", + role: "assistant", + content: accumulatedToolResults.map(r => ({ + type: "text", + text: r.content + })), + model: requestedModel, + stop_reason: "end_turn", + usage: { input_tokens: 0, output_tokens: 0 } + }; + + return { + response: { + status: 200, + body: directResponse, + terminationReason: "tool_result_direct", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "tool_result_direct", + }; + } + + continue; // Only if no tool results (shouldn't happen) } let anthropicPayload; diff --git a/src/tools/index.js b/src/tools/index.js index 11227f0..a6a6296 100644 --- a/src/tools/index.js +++ b/src/tools/index.js @@ -88,6 +88,10 @@ const TOOL_ALIASES = { runtests: "workspace_test_run", testsummary: "workspace_test_summary", testhistory: "workspace_test_history", + // Glob has dedicated tool in src/tools/indexer.js (registerGlobTool) + // - returns plain text format instead of JSON + // glob: "workspace_list", + // Glob: "workspace_list", }; function coerceString(value) { diff --git a/src/tools/indexer.js b/src/tools/indexer.js index eb0a981..2db3504 100644 --- a/src/tools/indexer.js +++ b/src/tools/indexer.js @@ -16,11 +16,13 @@ function registerWorkspaceListTool() { registerTool( "workspace_list", async ({ args = {} }) => { + // Support both 'pattern' (Glob tool) and 'patterns' (workspace_list) + const rawPatterns = args.pattern ?? args.patterns; const patterns = - typeof args.patterns === "string" - ? [args.patterns] - : Array.isArray(args.patterns) - ? args.patterns + typeof rawPatterns === "string" + ? [rawPatterns] + : Array.isArray(rawPatterns) + ? rawPatterns : undefined; const ignore = typeof args.ignore === "string" @@ -260,6 +262,45 @@ function registerSymbolReferencesTool() { ); } + +/** + * Dedicated Glob tool for Claude Code compatibility (maybe others?). + * + * Why this exists (instead of using workspace_list alias): + * - Claude Code's Glob tool returns plain text (one path per line) + * - workspace_list returns JSON with entries array + * - Models expect plain text format from Glob tool + * + * See also: TOOL_ALIASES in src/tools/index.js (commented glob entries) + */ +function registerGlobTool() { + registerTool( + "Glob", + async ({ args = {} }) => { + const pattern = args.pattern; + const basePath = args.path; + + let patterns; + if (basePath) { + const cleanPath = basePath.replace(/\/+$/, ""); + patterns = pattern ? [`${cleanPath}/${pattern}`] : [`${cleanPath}/**/*`]; + } else { + patterns = pattern ? [pattern] : undefined; + } + + const entries = await listWorkspaceFiles({ patterns, limit: 1000 }); + + // Plain text output: one path per line (Claude Code format) + return { + ok: true, + status: 200, + content: entries.map((e) => e.path).join("\n"), + }; + }, + { category: "indexing" }, + ); +} + function registerGotoDefinitionTool() { registerTool( "workspace_goto_definition", @@ -353,6 +394,7 @@ function registerIndexerTools() { registerSymbolSearchTool(); registerSymbolReferencesTool(); registerGotoDefinitionTool(); + registerGlobTool(); } module.exports = { From 3008d49ad7393ecd30425bcbbf11105dcf1e8517 Mon Sep 17 00:00:00 2001 From: bjoern Date: Fri, 6 Feb 2026 18:29:59 +0100 Subject: [PATCH 2/7] Improve tool calling response handling - Additional logging for tool call parsing and execution - Hard-coded shell commands for reliable tool execution - Deduplication of tool calls within a single response - Collect and return results from all called tools - Ollama uses specified Ollama model - Fix double-serialized JSON parameters from some providers --- src/api/middleware/logging.js | 96 +++++++-- src/api/middleware/request-logging.js | 18 +- src/api/router.js | 77 ++++++- src/config/index.js | 3 + src/orchestrator/index.js | 297 +++++++++++++++++++++++++- src/tools/index.js | 128 ++++++++++- src/tools/stubs.js | 29 +++ src/tools/workspace.js | 4 +- 8 files changed, 614 insertions(+), 38 deletions(-) diff --git a/src/api/middleware/logging.js b/src/api/middleware/logging.js index e53faee..dc72b03 100644 --- a/src/api/middleware/logging.js +++ b/src/api/middleware/logging.js @@ -12,26 +12,92 @@ function maskHeaders(headers = {}) { return clone; } -const loggingMiddleware = pinoHttp({ +const baseLoggingMiddleware = pinoHttp({ logger, - customProps: (req) => ({ + autoLogging: false, // Disable automatic logging so we can log manually with bodies + customProps: (req, res) => ({ sessionId: req.sessionId ?? null, }), - customLogLevel: (req, res, err) => { - if (err || res.statusCode >= 500) return "error"; - if (res.statusCode >= 400) return "warn"; - return "info"; - }, - wrapSerializers: true, - serializers: { - req(req) { - return { +}); + +// Wrapper middleware to capture and log full request/response bodies +function loggingMiddleware(req, res, next) { + const startTime = Date.now(); + + // Log request with full body immediately + logger.info({ + sessionId: req.sessionId ?? null, + req: { + method: req.method, + url: req.url, + headers: maskHeaders(req.headers), + }, + requestBody: req.body, // Full request body without truncation + }, 'request started'); + + // Intercept res.write for streaming responses + const originalWrite = res.write; + const chunks = []; + res.write = function (chunk) { + if (chunk) { + chunks.push(Buffer.from(chunk)); + } + return originalWrite.apply(this, arguments); + }; + + // Intercept res.send to capture the body + const originalSend = res.send; + res.send = function (body) { + res._capturedBody = body; + + // Parse if it's a JSON string for better logging + if (typeof body === 'string') { + try { + res._capturedBody = JSON.parse(body); + } catch (e) { + res._capturedBody = body; + } + } + + return originalSend.call(this, body); + }; + + // Log response when finished + res.on('finish', () => { + const responseTime = Date.now() - startTime; + + // Capture streaming body if not already captured via send() + if (chunks.length > 0 && !res._capturedBody) { + const fullBody = Buffer.concat(chunks).toString('utf8'); + res._capturedBody = { + type: 'stream', + contentType: res.getHeader('content-type'), + size: fullBody.length, + preview: fullBody.substring(0, 1000) + }; + } + + const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info'; + + logger[logLevel]({ + sessionId: req.sessionId ?? null, + req: { method: req.method, url: req.url, headers: maskHeaders(req.headers), - }; - }, - }, -}); + }, + res: { + statusCode: res.statusCode, + headers: res.getHeaders ? res.getHeaders() : res.headers, + }, + requestBody: req.body, // Full request body without truncation + responseBody: res._capturedBody, // Full response body without truncation + responseTime, + }, 'request completed'); + }); + + // Still call base middleware to set up req.log + baseLoggingMiddleware(req, res, next); +} module.exports = loggingMiddleware; diff --git a/src/api/middleware/request-logging.js b/src/api/middleware/request-logging.js index 8352e1a..cf2709e 100644 --- a/src/api/middleware/request-logging.js +++ b/src/api/middleware/request-logging.js @@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) { // Add to response headers res.setHeader("X-Request-ID", requestId); - // Log request start +// Log request start with full body logger.info( { requestId, method: req.method, path: req.path || req.url, query: req.query, + body: req.body, // Full request body without truncation ip: req.ip || req.socket.remoteAddress, userAgent: req.headers["user-agent"], }, @@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) { res.send = function (body) { const duration = Date.now() - startTime; - // Log request completion + // Parse body if it's a string + let responseBody = body; + if (typeof body === 'string') { + try { + responseBody = JSON.parse(body); + } catch (e) { + // Keep as string if not JSON + responseBody = body; + } + } + + // Log request completion with full request and response bodies logger.info( { requestId, @@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) { status: res.statusCode, duration, contentLength: res.getHeader("content-length"), + requestBody: req.body, // Full request body for reference + responseBody, // Full response body without truncation }, "Request completed" ); diff --git a/src/api/router.js b/src/api/router.js index b3ed198..057341d 100644 --- a/src/api/router.js +++ b/src/api/router.js @@ -7,6 +7,7 @@ const openaiRouter = require("./openai-router"); const providersRouter = require("./providers-handler"); const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing"); const { validateCwd } = require("../workspace"); +const logger = require("../logger"); const router = express.Router(); @@ -121,6 +122,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream); const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0; + logger.info({ + sessionId: req.headers['x-claude-session-id'], + wantsStream, + hasTools, + willUseStreamingPath: wantsStream || hasTools + }, "=== REQUEST ROUTING DECISION ==="); + // Analyze complexity for routing headers (Phase 3) const complexity = analyzeComplexity(req.body); const routingHeaders = getRoutingHeaders({ @@ -338,6 +346,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { // Legacy streaming wrapper (for tool-based requests that requested streaming) if (wantsStream && hasTools) { + logger.info({ + sessionId: req.headers['x-claude-session-id'], + pathType: 'legacy_streaming_wrapper', + wantsStream, + hasTools + }, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ==="); + metrics.recordStreamingStart(); res.set({ "Content-Type": "text/event-stream", @@ -359,6 +374,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { // Use proper Anthropic SSE format const msg = result.body; + logger.info({ + sessionId: req.headers['x-claude-session-id'], + eventType: 'message_start', + streamingWithTools: true, + hasContent: !!(msg.content && msg.content.length > 0) + }, "=== SENDING SSE MESSAGE_START ==="); + // 1. message_start res.write(`event: message_start\n`); res.write(`data: ${JSON.stringify({ @@ -419,9 +441,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); + } else if (block.type === "tool_result") { + // === TOOL_RESULT SSE STREAMING - ENTERED === + logger.info({ + blockIndex: i, + blockType: block.type, + toolUseId: block.tool_use_id, + contentType: typeof block.content, + contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length + }, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ==="); + + // Stream tool_result blocks so CLI can display actual tool output + res.write(`event: content_block_start\n`); + res.write(`data: ${JSON.stringify({ + type: "content_block_start", + index: i, + content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" } + })}\n\n`); + + // Stream the actual content + const content = typeof block.content === 'string' + ? block.content + : JSON.stringify(block.content); + + logger.info({ + blockIndex: i, + contentLength: content.length, + contentPreview: content.substring(0, 200) + }, "=== SSE: STREAMING TOOL_RESULT CONTENT ==="); + + res.write(`event: content_block_delta\n`); + res.write(`data: ${JSON.stringify({ + type: "content_block_delta", + index: i, + delta: { type: "tool_result_delta", content: content } + })}\n\n`); + + res.write(`event: content_block_stop\n`); + res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); + + // === TOOL_RESULT SSE STREAMING - COMPLETED === + logger.info({ + blockIndex: i, + toolUseId: block.tool_use_id + }, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ==="); } } - // 3. message_delta with stop_reason res.write(`event: message_delta\n`); res.write(`data: ${JSON.stringify({ @@ -454,6 +519,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { }); } + + // DIAGNOSTIC: Log response being sent to client + logger.info({ + status: result.status, + hasBody: !!result.body, + bodyKeys: result.body ? Object.keys(result.body) : [], + bodyType: typeof result.body, + contentLength: result.body ? JSON.stringify(result.body).length : 0 + }, "=== SENDING RESPONSE TO CLIENT ==="); + metrics.recordResponse(result.status); res.status(result.status).send(result.body); } catch (error) { diff --git a/src/config/index.js b/src/config/index.js index 51cc548..d0b82ec 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -170,6 +170,7 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) { "TOOL_EXECUTION_MODE must be one of: server, client, passthrough (default: server)" ); } +console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`); // Memory system configuration (Titans-inspired long-term memory) const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true @@ -342,6 +343,8 @@ const databricksUrl = ? `${rawBaseUrl}${endpointPath.startsWith("/") ? "" : "/"}${endpointPath}` : null; +// Set MODEL_DEFAULT env var to use a specific model (e.g. "llama3.1" for Ollama). +// Without it, the default falls back to a Databricks Claude model regardless of MODEL_PROVIDER. const defaultModel = process.env.MODEL_DEFAULT ?? (modelProvider === "azure-anthropic" ? "claude-opus-4-5" : "databricks-claude-sonnet-4-5"); diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index 9596db6..bb2f9c5 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -919,6 +919,10 @@ function sanitizePayload(payload) { : "claude-opus-4-5"; clean.model = azureDefaultModel; } else if (providerType === "ollama") { + // Override client model with Ollama config model + const ollamaConfiguredModel = config.ollama?.model; + clean.model = ollamaConfiguredModel; + // Ollama format conversion // Check if model supports tools const { modelNameSupportsTools } = require("../clients/ollama-utils"); @@ -1024,8 +1028,15 @@ function sanitizePayload(payload) { } // Very short messages (< 20 chars) without code/technical keywords + // BUT: Common shell commands should NOT be treated as conversational + const shellCommands = /^(pwd|ls|cd|cat|echo|grep|find|ps|top|df|du|whoami|which|env)[\s\.\!\?]*$/; + if (shellCommands.test(trimmed)) { + logger.info({ matched: "shell_command", trimmed }, "Ollama conversational check - SHELL COMMAND detected, keeping tools"); + return false; // NOT conversational - needs tools! + } + if (trimmed.length < 20 && !/code|file|function|error|bug|fix|write|read|create/.test(trimmed)) { - logger.debug({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - matched"); + logger.warn({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - SHORT MESSAGE matched, DELETING TOOLS"); return true; } @@ -1035,13 +1046,15 @@ function sanitizePayload(payload) { if (isConversational) { // Strip all tools for simple conversational messages + const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0; delete clean.tools; delete clean.tool_choice; - logger.debug({ + logger.warn({ model: config.ollama?.model, - message: "Removed tools for conversational message" - }, "Ollama conversational mode"); - } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) { + message: "Removed tools for conversational message", + originalToolCount, + userMessage: clean.messages?.[clean.messages.length - 1]?.content?.substring(0, 50), + }, "Ollama conversational mode - ALL TOOLS DELETED!"); } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) { // Ollama performance degrades with too many tools // Limit to essential tools only const OLLAMA_ESSENTIAL_TOOLS = new Set([ @@ -1052,7 +1065,8 @@ function sanitizePayload(payload) { "Glob", "Grep", "WebSearch", - "WebFetch" + "WebFetch", + "shell", // Tool is registered as "shell" internally ]); const limitedTools = clean.tools.filter(tool => @@ -1342,6 +1356,20 @@ async function runAgentLoop({ const toolCallNames = new Map(); const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count let loopWarningInjected = false; // Track if we've already warned about loops + const accumulatedToolResults = []; // Track tool results to include in response for CLI display + + // Log agent loop start + logger.info( + { + sessionId: session?.id ?? null, + model: requestedModel, + maxSteps: settings.maxSteps, + maxDurationMs: settings.maxDurationMs, + wantsThinking, + providerType, + }, + "Agent loop started", + ); while (steps < settings.maxSteps) { if (Date.now() - start > settings.maxDurationMs) { @@ -1761,6 +1789,15 @@ IMPORTANT TOOL USAGE RULES: }); } } + logger.info({ + messageContent: databricksResponse.json?.message?.content + ? (typeof databricksResponse.json.message.content === 'string' + ? databricksResponse.json.message.content.substring(0, 500) + : JSON.stringify(databricksResponse.json.message.content).substring(0, 500)) + : 'NO_CONTENT', + hasToolCalls: !!databricksResponse.json?.message?.tool_calls, + toolCallCount: databricksResponse.json?.message?.tool_calls?.length || 0 + }, "=== RAW LLM RESPONSE CONTENT ==="); // Handle streaming responses (pass through without buffering) if (databricksResponse.stream) { @@ -1860,11 +1897,13 @@ IMPORTANT TOOL USAGE RULES: _anthropic_block: block, })); - logger.debug( + logger.info( { sessionId: session?.id ?? null, + step: steps, contentBlocks: contentArray.length, toolCallsFound: toolCalls.length, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), stopReason: databricksResponse.json?.stop_reason, }, "Azure Anthropic response parsed", @@ -1874,6 +1913,98 @@ IMPORTANT TOOL USAGE RULES: const choice = databricksResponse.json?.choices?.[0]; message = choice?.message ?? {}; toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + // Deduplicate tool calls for OpenAI format too + if (toolCalls.length > 0) { + const uniqueToolCalls = []; + const seenSignatures = new Set(); + let duplicatesRemoved = 0; + + for (const call of toolCalls) { + const signature = getToolCallSignature(call); + if (!seenSignatures.has(signature)) { + seenSignatures.add(signature); + uniqueToolCalls.push(call); + } else { + duplicatesRemoved++; + logger.warn({ + sessionId: session?.id ?? null, + toolName: call.function?.name || call.name, + toolId: call.id, + signature: signature.substring(0, 32), + }, "Duplicate tool call removed (same tool with identical parameters in single response)"); + } + } + + toolCalls = uniqueToolCalls; + + logger.info( + { + sessionId: session?.id ?? null, + step: steps, + toolCallsFound: toolCalls.length, + duplicatesRemoved, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), + }, + "LLM Response: Tool calls requested (after deduplication)", + ); + } else if (providerType === "ollama") { + // Ollama format: { message: { role, content, tool_calls }, done } + message = databricksResponse.json?.message ?? {}; + toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + logger.info({ + hasMessage: !!databricksResponse.json?.message, + hasToolCalls: toolCalls.length > 0, + toolCallCount: toolCalls.length, + toolNames: toolCalls.map(tc => tc.function?.name), + done: databricksResponse.json?.done, + fullToolCalls: JSON.stringify(toolCalls), + fullResponseMessage: JSON.stringify(databricksResponse.json?.message) + }, "=== OLLAMA TOOL CALLS EXTRACTION ==="); + } else { + // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] } + const choice = databricksResponse.json?.choices?.[0]; + message = choice?.message ?? {}; + toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + // Deduplicate tool calls for OpenAI format too + if (toolCalls.length > 0) { + const uniqueToolCalls = []; + const seenSignatures = new Set(); + let duplicatesRemoved = 0; + + for (const call of toolCalls) { + const signature = getToolCallSignature(call); + + if (!seenSignatures.has(signature)) { + seenSignatures.add(signature); + uniqueToolCalls.push(call); + } else { + duplicatesRemoved++; + logger.warn({ + sessionId: session?.id ?? null, + toolName: call.function?.name || call.name, + toolId: call.id, + signature: signature.substring(0, 32), + }, "Duplicate tool call removed (same tool with identical parameters in single response)"); + } + } + + toolCalls = uniqueToolCalls; + + logger.info( + { + sessionId: session?.id ?? null, + step: steps, + toolCallsFound: toolCalls.length, + duplicatesRemoved, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), + }, + "LLM Response: Tool calls requested (after deduplication)", + ); + } + } } if (toolCalls.length > 0) { @@ -2129,6 +2260,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, })) ); @@ -2175,6 +2307,15 @@ IMPORTANT TOOL USAGE RULES: cleanPayload.messages.push(toolMessage); + logger.info( + { + toolName: execution.name, + content: typeof toolMessage.content === 'string' + ? toolMessage.content.substring(0, 500) + : JSON.stringify(toolMessage.content).substring(0, 500) + }, "Tool result content sent to LLM", + ); + // Convert to Anthropic format for session storage let sessionToolResultContent; if (providerType === "azure-anthropic") { @@ -2362,6 +2503,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, }); let toolMessage; @@ -2452,6 +2594,39 @@ IMPORTANT TOOL USAGE RULES: }, }); + // Accumulate tool results for CLI display + // Build a standardized tool_result block in Anthropic format + logger.info({ + sessionId: session?.id ?? null, + callId: call.id, + executionId: execution.id, + toolName: call.function?.name ?? call.name ?? execution.name, + executionOk: execution.ok, + contentType: typeof execution.content, + accumulatedCountBefore: accumulatedToolResults.length + }, "=== ACCUMULATING TOOL RESULT FOR CLI - START ==="); + + const toolUseId = call.id ?? execution.id; + const toolResultContent = typeof execution.content === "string" + ? execution.content + : JSON.stringify(execution.content); + accumulatedToolResults.push({ + type: "tool_result", + tool_use_id: toolUseId, + tool_name: call.function?.name ?? call.name ?? execution.name, + content: toolResultContent, + is_error: execution.ok === false, + }); + + logger.info({ + sessionId: session?.id ?? null, + toolUseId, + toolName: call.function?.name ?? call.name ?? execution.name, + contentLength: toolResultContent.length, + contentPreview: toolResultContent.substring(0, 200), + accumulatedCountAfter: accumulatedToolResults.length + }, "=== ACCUMULATING TOOL RESULT FOR CLI - END ==="); + if (execution.ok) { logger.debug( { @@ -2574,11 +2749,11 @@ IMPORTANT TOOL USAGE RULES: // Return tool results directly to CLI - no more LLM call needed // The tool result IS the answer (e.g., file contents for Read) if (accumulatedToolResults.length > 0) { - auditLog("=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===", { + logger.info({ sessionId: session?.id ?? null, toolResultCount: accumulatedToolResults.length, toolNames: accumulatedToolResults.map(r => r.tool_name) - }); + }, "=== RETURNING TOOL RESULTS DIRECTLY TO CLI ==="); // Convert tool_result blocks to text blocks for CLI display // The CLI only understands text/tool_use in responses, not tool_result @@ -2607,6 +2782,15 @@ IMPORTANT TOOL USAGE RULES: }; } + logger.info({ + sessionId: session?.id ?? null, + step: steps, + toolCallsExecuted: toolCallsExecuted, + totalToolCallsInThisStep: toolCalls.length, + messageCount: cleanPayload.messages.length, + lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role, + }, "Tool execution complete - processing next toolCall"); + continue; // Only if no tool results (shouldn't happen) } @@ -3054,6 +3238,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, }); const toolResultMessage = createFallbackToolResultMessage(providerType, { @@ -3196,6 +3381,100 @@ IMPORTANT TOOL USAGE RULES: }, "Agent loop completed successfully", ); + + // Include accumulated tool results in the response for CLI display + // This ensures the client sees actual tool output, not just LLM summaries + logger.info({ + sessionId: session?.id ?? null, + accumulatedToolResultsCount: accumulatedToolResults.length, + hasAnthropicPayload: !!anthropicPayload, + currentContentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none', + currentContentLength: anthropicPayload?.content?.length || 0 + }, "=== BEFORE TOOL RESULTS INCLUSION CHECK ==="); + + if (accumulatedToolResults.length > 0) { + logger.info({ + sessionId: session?.id ?? null, + toolResultCount: accumulatedToolResults.length, + toolNames: accumulatedToolResults.map(r => r.tool_name), + toolUseIds: accumulatedToolResults.map(r => r.tool_use_id) + }, "=== ENTERING TOOL RESULTS INCLUSION BLOCK ==="); + + // Ensure content is an array + if (!Array.isArray(anthropicPayload.content)) { + logger.info({ + sessionId: session?.id ?? null, + originalContentType: typeof anthropicPayload.content, + originalContentValue: anthropicPayload.content ? String(anthropicPayload.content).substring(0, 100) : 'null' + }, "=== CONTENT NOT ARRAY - CONVERTING ==="); + + anthropicPayload.content = anthropicPayload.content + ? [{ type: "text", text: String(anthropicPayload.content) }] + : []; + + logger.info({ + sessionId: session?.id ?? null, + convertedContentLength: anthropicPayload.content.length + }, "=== CONTENT CONVERTED TO ARRAY ==="); + } else { + logger.info({ + sessionId: session?.id ?? null, + existingContentLength: anthropicPayload.content.length, + existingContentTypes: anthropicPayload.content.map(b => b.type) + }, "=== CONTENT ALREADY ARRAY ==="); + } + + // Prepend tool results before text content so they appear in order + const contentBeforePrepend = anthropicPayload.content.length; + anthropicPayload.content = [...accumulatedToolResults, ...anthropicPayload.content]; + + logger.info({ + sessionId: session?.id ?? null, + toolResultCount: accumulatedToolResults.length, + toolNames: accumulatedToolResults.map(r => r.tool_name), + contentBeforePrepend, + contentAfterPrepend: anthropicPayload.content.length, + finalContentTypes: anthropicPayload.content.map(b => b.type) + }, "=== TOOL RESULTS PREPENDED TO RESPONSE ==="); + + for (const block of anthropicPayload.content) { + if (block.type === "tool_result") { + logger.info({ + toolName: block.tool_name, + content: typeof block.content === 'string' + ? block.content.substring(0, 500) + : JSON.stringify(block.content).substring(0, 500) + }, "=== TOOL RESULT CONTENT SENT TO CLI ==="); + } else if (block.type === "text") { + logger.info({ + text: block.text.substring(0, 500) + }, "=== TEXT CONTENT SENT TO CLI ==="); + } + } + + } else { + logger.info({ + sessionId: session?.id ?? null + }, "=== NO TOOL RESULTS TO INCLUDE (accumulatedToolResults empty) ==="); + } + + logger.info({ + sessionId: session?.id ?? null, + finalContentLength: anthropicPayload?.content?.length || 0, + finalContentTypes: anthropicPayload?.content?.map(b => b.type) || [] + }, "=== AFTER TOOL RESULTS INCLUSION CHECK ==="); + + // DIAGNOSTIC: Log response being returned + logger.info({ + sessionId: session?.id ?? null, + status: 200, + hasBody: !!anthropicPayload, + bodyKeys: anthropicPayload ? Object.keys(anthropicPayload) : [], + contentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none', + contentLength: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content).length) : 0, + stopReason: anthropicPayload?.stop_reason + }, "=== RETURNING RESPONSE TO CLIENT ==="); + return { response: { status: 200, diff --git a/src/tools/index.js b/src/tools/index.js index a6a6296..3d76777 100644 --- a/src/tools/index.js +++ b/src/tools/index.js @@ -94,6 +94,37 @@ const TOOL_ALIASES = { // Glob: "workspace_list", }; +/** + * Recursively parse string values that look like JSON arrays/objects. + * Some providers double-serialize nested parameters (e.g. questions: "[{...}]" + * instead of questions: [{...}]), which causes schema validation failures. + */ +function deepParseStringifiedJson(obj) { + if (typeof obj !== "object" || obj === null) return obj; + if (Array.isArray(obj)) return obj.map(deepParseStringifiedJson); + + const result = {}; + for (const [key, value] of Object.entries(obj)) { + if (typeof value === "string") { + const trimmed = value.trim(); + if ( + (trimmed.startsWith("[") && trimmed.endsWith("]")) || + (trimmed.startsWith("{") && trimmed.endsWith("}")) + ) { + try { + result[key] = deepParseStringifiedJson(JSON.parse(trimmed)); + continue; + } catch { + // Not valid JSON, keep as string + } + } + } + result[key] = + typeof value === "object" ? deepParseStringifiedJson(value) : value; + } + return result; +} + function coerceString(value) { if (value === undefined || value === null) return ""; if (typeof value === "string") return value; @@ -128,24 +159,65 @@ function normalizeHandlerResult(result) { return { ok, status, content, metadata }; } -function parseArguments(call) { +function parseArguments(call, providerType = null) { const raw = call?.function?.arguments; - if (typeof raw !== "string" || raw.trim().length === 0) return {}; + + // DEBUG: Log full call structure for diagnosis + logger.info({ + providerType, + fullCall: JSON.stringify(call), + hasFunction: !!call?.function, + functionKeys: call?.function ? Object.keys(call.function) : [], + argumentsType: typeof raw, + argumentsValue: raw, + argumentsIsNull: raw === null, + argumentsIsUndefined: raw === undefined, + }, "=== PARSING TOOL ARGUMENTS ==="); + + // Ollama sends arguments as an object, OpenAI as a JSON string + if (typeof raw === "object" && raw !== null) { + if (providerType !== "ollama") { + logger.warn({ + providerType, + expectedProvider: "ollama", + argumentsType: typeof raw, + arguments: raw + }, `Received object arguments but provider is ${providerType || "unknown"}, expected ollama format. Continuing with object.`); + } else { + logger.info({ + type: "object", + arguments: raw + }, "Tool arguments already parsed (Ollama format)"); + } + return deepParseStringifiedJson(raw); + } + + if (typeof raw !== "string" || raw.trim().length === 0) { + logger.warn({ + argumentsType: typeof raw, + argumentsEmpty: !raw || raw.trim().length === 0, + providerType + }, "Arguments not a string or empty - returning {}"); + return {}; + } + try { - return JSON.parse(raw); + const parsed = JSON.parse(raw); + logger.info({ parsed }, "Parsed JSON string arguments"); + return deepParseStringifiedJson(parsed); } catch (err) { - logger.warn({ err }, "Failed to parse tool arguments"); + logger.warn({ err, raw }, "Failed to parse tool arguments"); return {}; } } -function normaliseToolCall(call) { +function normaliseToolCall(call, providerType = null) { const name = call?.function?.name ?? call?.name; const id = call?.id ?? `${name ?? "tool"}_${Date.now()}`; return { id, name, - arguments: parseArguments(call), + arguments: parseArguments(call, providerType), raw: call, }; } @@ -186,7 +258,8 @@ function listTools() { } async function executeToolCall(call, context = {}) { - const normalisedCall = normaliseToolCall(call); + const providerType = context?.providerType || context?.provider || null; + const normalisedCall = normaliseToolCall(call, providerType); let registered = registry.get(normalisedCall.name); if (!registered) { const aliasTarget = TOOL_ALIASES[normalisedCall.name.toLowerCase()]; @@ -229,6 +302,10 @@ async function executeToolCall(call, context = {}) { } if (!registered) { + logger.warn({ + tool: normalisedCall.name, + id: normalisedCall.id + }, "Tool not registered"); const content = coerceString({ error: "tool_not_registered", tool: normalisedCall.name, @@ -245,6 +322,17 @@ async function executeToolCall(call, context = {}) { }; } + // Log tool invocation with full details for debugging + logger.info({ + tool: normalisedCall.name, + id: normalisedCall.id, + args: normalisedCall.arguments, + argsKeys: Object.keys(normalisedCall.arguments || {}), + rawCall: JSON.stringify(normalisedCall.raw) + }, "=== EXECUTING TOOL ==="); + + startTime = Date.now() + try { const result = await registered.handler( { @@ -260,6 +348,18 @@ async function executeToolCall(call, context = {}) { // Apply tool output truncation for token efficiency const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content); + const durationMs = Date.now() - startTime; + + // Log successful execution + logger.info({ + tool: normalisedCall.name, + id: normalisedCall.id, + status: formatted.status, + durationMs, + outputLength: truncatedContent?.length || 0, + truncated: truncatedContent !== formatted.content + }, "Tool execution completed"); + return { id: normalisedCall.id, name: normalisedCall.name, @@ -271,11 +371,20 @@ async function executeToolCall(call, context = {}) { registered: true, truncated: truncatedContent !== formatted.content, originalLength: formatted.content?.length, - truncatedLength: truncatedContent?.length + truncatedLength: truncatedContent?.length, + durationMs }, }; } catch (err) { - logger.error({ err, tool: normalisedCall.name }, "Tool execution failed"); + const durationMs = Date.now() - startTime; + + logger.error({ + err, + tool: normalisedCall.name, + id: normalisedCall.id, + durationMs + }, "Tool execution failed"); + return { id: normalisedCall.id, name: normalisedCall.name, @@ -290,6 +399,7 @@ async function executeToolCall(call, context = {}) { metadata: { registered: true, error: true, + durationMs }, error: err, }; diff --git a/src/tools/stubs.js b/src/tools/stubs.js index c026e8e..d2f1bd3 100644 --- a/src/tools/stubs.js +++ b/src/tools/stubs.js @@ -41,12 +41,41 @@ function createStubHandler(name, description) { }); } +function askUserQuestionHandler({ args }) { + let questions = args?.questions ?? []; + + if (typeof questions === "string") { + try { questions = JSON.parse(questions); } catch { questions = []; } + } + + if (!Array.isArray(questions)) questions = [questions]; + const lines = questions.map((q, i) => { + const header = q.header ? `[${q.header}] ` : ""; + const opts = (q.options ?? []) + .map((o, j) => ` ${j + 1}. ${o.label} — ${o.description}`) + .join("\n"); + return `${header}${q.question}\n${opts}`; + }); + + return { + ok: true, + status: 200, + content: lines.join("\n\n"), + }; +} + function registerStubTools() { STUB_TOOLS.forEach((tool) => { if (!hasTool(tool.name)) { registerTool(tool.name, createStubHandler(tool.name, tool.description), tool); } }); + + if (!hasTool("AskUserQuestion")) { + registerTool("AskUserQuestion", askUserQuestionHandler, { + description: "Returns the model's question to the user as assistant output.", + }); + } } module.exports = { diff --git a/src/tools/workspace.js b/src/tools/workspace.js index 144c6c1..a020a2d 100644 --- a/src/tools/workspace.js +++ b/src/tools/workspace.js @@ -30,7 +30,7 @@ function registerWorkspaceTools() { registerTool( "fs_read", async ({ args = {} }) => { - const relativePath = validateString(args.path ?? args.file, "path"); + const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path"); const encoding = normalizeEncoding(args.encoding); const content = await readFile(relativePath, encoding); return { @@ -114,7 +114,7 @@ function registerWorkspaceTools() { registerTool( "edit_patch", async ({ args = {} }, context = {}) => { - const relativePath = validateString(args.path ?? args.file, "path"); + const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path"); const patch = validateString(args.patch, "patch"); const encoding = normalizeEncoding(args.encoding); From 911aeca5ce5a1dd6d1a4d2522b0aa2a3db45a1d3 Mon Sep 17 00:00:00 2001 From: bjoern Date: Sun, 8 Feb 2026 12:51:30 +0100 Subject: [PATCH 3/7] Add external file read with tilde expansion and user approval flow Enable fs_read to handle paths outside the workspace (~/... and absolute paths) via a two-phase approval flow: the tool first returns a 403 asking the LLM to get user confirmation, then reads the file on a second call with user_approved=true. Write/edit remain workspace-only. --- src/clients/standard-tools.js | 8 +++++-- src/tools/workspace.js | 39 +++++++++++++++++++++++++++++++---- src/workspace/index.js | 30 +++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 6 deletions(-) diff --git a/src/clients/standard-tools.js b/src/clients/standard-tools.js index 51e4163..61ac791 100644 --- a/src/clients/standard-tools.js +++ b/src/clients/standard-tools.js @@ -24,13 +24,17 @@ const STANDARD_TOOLS = [ }, { name: "Read", - description: "Reads a file from the local filesystem. You can access any file directly by using this tool.", + description: "Reads a file from the local filesystem. You can access any file directly by using this tool. For files outside the workspace, the user must approve access first.", input_schema: { type: "object", properties: { file_path: { type: "string", - description: "Relative path within workspace (e.g., 'config.js', 'src/index.ts'). DO NOT use absolute paths." + description: "Path to the file. Use relative paths for workspace files (e.g., 'src/index.ts'). For files outside the workspace use absolute paths or ~ for the home directory (e.g., '~/Documents/notes.md', '/etc/hosts'). Each call reads ONE file only — do not pass multiple paths." + }, + user_approved: { + type: "boolean", + description: "Set to true ONLY after the user has explicitly approved reading a file outside the workspace. Never set this to true without asking the user first." }, limit: { type: "number", diff --git a/src/tools/workspace.js b/src/tools/workspace.js index a020a2d..9971ba3 100644 --- a/src/tools/workspace.js +++ b/src/tools/workspace.js @@ -1,8 +1,12 @@ +const path = require("path"); const { readFile, writeFile, applyFilePatch, resolveWorkspacePath, + expandTilde, + isExternalPath, + readExternalFile, fileExists, workspaceRoot, } = require("../workspace"); @@ -30,17 +34,44 @@ function registerWorkspaceTools() { registerTool( "fs_read", async ({ args = {} }) => { - const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path"); + const targetPath = validateString(args.path ?? args.file ?? args.file_path, "path"); const encoding = normalizeEncoding(args.encoding); - const content = await readFile(relativePath, encoding); + + // Check if path is outside workspace + if (isExternalPath(targetPath)) { + if (args.user_approved !== true) { + const expanded = expandTilde(targetPath); + const resolved = path.resolve(expanded); + return { + ok: false, + status: 403, + content: JSON.stringify({ + error: "external_path_requires_approval", + message: `The file "${targetPath}" resolves to "${resolved}" which is outside the workspace. You MUST ask the user for permission before reading this file. If the user approves, call this tool again with the same path and set user_approved to true.`, + resolved_path: resolved, + }), + }; + } + // User approved — read external file + const { content, resolvedPath } = await readExternalFile(targetPath, encoding); + return { + ok: true, + status: 200, + content, + metadata: { path: targetPath, encoding, resolved_path: resolvedPath }, + }; + } + + // Normal workspace read (unchanged) + const content = await readFile(targetPath, encoding); return { ok: true, status: 200, content, metadata: { - path: relativePath, + path: targetPath, encoding, - resolved_path: resolveWorkspacePath(relativePath), + resolved_path: resolveWorkspacePath(targetPath), }, }; }, diff --git a/src/workspace/index.js b/src/workspace/index.js index da1a7e0..6cc058a 100644 --- a/src/workspace/index.js +++ b/src/workspace/index.js @@ -10,6 +10,33 @@ if (!fs.existsSync(workspaceRoot)) { fs.mkdirSync(workspaceRoot, { recursive: true }); } +function expandTilde(targetPath) { + if (typeof targetPath !== "string") return targetPath; + if (targetPath.startsWith("~")) { + const home = process.env.HOME || process.env.USERPROFILE; + if (home) { + return path.join(home, targetPath.slice(1)); + } + } + return targetPath; +} + +function isExternalPath(targetPath) { + const expanded = expandTilde(targetPath); + const resolved = path.resolve(workspaceRoot, expanded); + return !resolved.startsWith(workspaceRoot); +} + +async function readExternalFile(targetPath, encoding = "utf8") { + const expanded = expandTilde(targetPath); + const resolved = path.resolve(expanded); + const stats = await fsp.stat(resolved); + if (!stats.isFile()) { + throw new Error("Requested path is not a file."); + } + return { content: await fsp.readFile(resolved, { encoding }), resolvedPath: resolved }; +} + function resolveWorkspacePath(targetPath) { if (!targetPath || typeof targetPath !== "string") { throw new Error("Path must be a non-empty string."); @@ -110,6 +137,9 @@ function validateCwd(cwd) { module.exports = { workspaceRoot, resolveWorkspacePath, + expandTilde, + isExternalPath, + readExternalFile, readFile, writeFile, fileExists, From 2521ea63077c95d53ce5036f4d4161578fbd5f01 Mon Sep 17 00:00:00 2001 From: bjoern Date: Mon, 9 Feb 2026 13:08:59 +0100 Subject: [PATCH 4/7] Remove tool_result_direct short-circuit and improve agentic loop Tool results now loop back to the model for natural language synthesis instead of being returned raw to the CLI. This fixes the bug where conversational messages (e.g. "hi") triggered tool calls and dumped raw output. Additional improvements: - Context-aware tiered compression that scales with model context window - Empty response detection with retry-then-fallback - _noToolInjection flag to prevent provider-level tool re-injection - Auto-approve external file reads in tool executor - Conversation context search in workspace_search --- src/clients/databricks.js | 20 +-- src/clients/standard-tools.js | 2 +- src/context/compression.js | 130 ++++++++++---- src/orchestrator/index.js | 309 +++++++++++++++----------------- src/providers/context-window.js | 144 +++++++++++++++ src/tools/index.js | 26 ++- src/tools/indexer.js | 66 ++++++- src/tools/workspace.js | 10 +- 8 files changed, 486 insertions(+), 221 deletions(-) create mode 100644 src/providers/context-window.js diff --git a/src/clients/databricks.js b/src/clients/databricks.js index d2d0d03..3f82be7 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -181,7 +181,7 @@ async function invokeDatabricks(body) { const databricksBody = { ...body }; // Inject standard tools if client didn't send any (passthrough mode) - if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) { + if (!body._noToolInjection && (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0)) { databricksBody.tools = STANDARD_TOOLS; logger.info({ injectedToolCount: STANDARD_TOOLS.length, @@ -222,7 +222,7 @@ async function invokeAzureAnthropic(body) { } // Inject standard tools if client didn't send any (passthrough mode) - if (!Array.isArray(body.tools) || body.tools.length === 0) { + if (!body._noToolInjection && (!Array.isArray(body.tools) || body.tools.length === 0)) { body.tools = STANDARD_TOOLS; logger.info({ injectedToolCount: STANDARD_TOOLS.length, @@ -331,7 +331,7 @@ async function invokeOllama(body) { if (!supportsTools) { // Model doesn't support tools - don't inject them toolsToSend = null; - } else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { + } else if (injectToolsOllama && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Model supports tools and none provided - inject them toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -411,7 +411,7 @@ async function invokeOpenRouter(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -492,7 +492,7 @@ async function invokeAzureOpenAI(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -843,7 +843,7 @@ async function invokeOpenAI(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -945,7 +945,7 @@ async function invokeLlamaCpp(body) { let toolsInjected = false; const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false"; - if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { + if (injectToolsLlamacpp && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.info({ @@ -1028,7 +1028,7 @@ async function invokeLMStudio(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.info({ @@ -1075,7 +1075,7 @@ async function invokeBedrock(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.info({ @@ -1359,7 +1359,7 @@ async function invokeZai(body) { zaiBody.model = mappedModel; // Inject standard tools if client didn't send any (passthrough mode) - if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) { + if (!body._noToolInjection && (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0)) { zaiBody.tools = STANDARD_TOOLS; logger.info({ injectedToolCount: STANDARD_TOOLS.length, diff --git a/src/clients/standard-tools.js b/src/clients/standard-tools.js index 61ac791..6cfd833 100644 --- a/src/clients/standard-tools.js +++ b/src/clients/standard-tools.js @@ -24,7 +24,7 @@ const STANDARD_TOOLS = [ }, { name: "Read", - description: "Reads a file from the local filesystem. You can access any file directly by using this tool. For files outside the workspace, the user must approve access first.", + description: "Reads a file from the local filesystem. You can access any file directly by using this tool.\n\nEXTERNAL FILE APPROVAL FLOW: When reading a file outside the workspace, the tool will return an [APPROVAL REQUIRED] message instead of the file content. When this happens you MUST: (1) Tell the user the file is outside the workspace and ask for permission. (2) If the user approves, call this tool again with the SAME file_path and set user_approved=true. (3) Only then will the file content be returned.", input_schema: { type: "object", properties: { diff --git a/src/context/compression.js b/src/context/compression.js index 518aaba..47b0413 100644 --- a/src/context/compression.js +++ b/src/context/compression.js @@ -2,24 +2,63 @@ * History Compression for Token Optimization * * Compresses conversation history to reduce token usage while - * maintaining context quality. Uses sliding window approach: - * - Keep recent turns verbatim - * - Summarize older turns - * - Compress tool results + * maintaining context quality. Uses sliding window approach with + * percentage-based tiered compression that scales with recency + * and the model's context window size. * + * Tiers: + * - veryRecent (last 4 messages): keep 90% of content + * - recent (messages 5-10): keep 50% of content + * - old (11+): keep 20% of content */ const logger = require('../logger'); const config = require('../config'); +// Compression tiers: ratio = percentage of content to keep, minFloor = minimum chars +const COMPRESSION_TIERS = { + veryRecent: { ratio: 0.9, minFloor: 500 }, + recent: { ratio: 0.5, minFloor: 300 }, + old: { ratio: 0.2, minFloor: 200 }, +}; + +// How many of the recent messages count as "very recent" +const VERY_RECENT_COUNT = 4; + /** - * Compress conversation history to fit within token budget + * Compute the maximum character cap for a tier based on context window size. + * + * @param {number} contextWindowTokens - Model's context window in tokens (-1 = unknown) + * @param {string} tierName - "veryRecent", "recent", or "old" + * @returns {number} Maximum characters for tool result content in this tier + */ +function computeMaxCap(contextWindowTokens, tierName) { + // Convert tokens to chars (~4 chars/token), default to 8K tokens if unknown + const contextChars = (contextWindowTokens === -1 ? 8000 : contextWindowTokens) * 4; + const budgetRatios = { + veryRecent: 0.25, + recent: 0.10, + old: 0.03, + }; + return Math.floor(contextChars * (budgetRatios[tierName] ?? 0.03)); +} + +/** + * Compute the character limit for a piece of content based on tier and context window. * - * Strategy: - * 1. Keep last N turns verbatim (fresh context) - * 2. Summarize older turns (compressed history) - * 3. Compress tool results to key information only - * 4. Remove redundant exchanges + * @param {string} text - The text content + * @param {string} tierName - Tier name + * @param {number} contextWindowTokens - Context window in tokens + * @returns {number} Character limit + */ +function computeLimit(text, tierName, contextWindowTokens) { + const tier = COMPRESSION_TIERS[tierName] || COMPRESSION_TIERS.old; + const maxCap = computeMaxCap(contextWindowTokens, tierName); + return Math.min(maxCap, Math.max(tier.minFloor, Math.floor(text.length * tier.ratio))); +} + +/** + * Compress conversation history to fit within token budget * * @param {Array} messages - Conversation history * @param {Object} options - Compression options @@ -28,6 +67,8 @@ const config = require('../config'); function compressHistory(messages, options = {}) { if (!messages || messages.length === 0) return messages; + const contextWindowTokens = options.contextWindowTokens ?? -1; + const opts = { keepRecentTurns: options.keepRecentTurns ?? config.historyCompression?.keepRecentTurns ?? 10, summarizeOlder: options.summarizeOlder ?? config.historyCompression?.summarizeOlder ?? true, @@ -58,12 +99,16 @@ function compressHistory(messages, options = {}) { compressed.push(summary); } } else { - // Just compress tool results in old messages - compressed = oldMessages.map(msg => compressMessage(msg)); + // Compress tool results in old messages using "old" tier + compressed = oldMessages.map(msg => compressMessage(msg, "old", contextWindowTokens)); } - // Add recent messages (may compress tool results but keep content) - const recentCompressed = recentMessages.map(msg => compressToolResults(msg)); + // Add recent messages with tiered compression + const recentCompressed = recentMessages.map((msg, i) => { + const isVeryRecent = i >= recentMessages.length - VERY_RECENT_COUNT; + const tierName = isVeryRecent ? "veryRecent" : "recent"; + return compressToolResults(msg, tierName, contextWindowTokens); + }); const finalMessages = [...compressed, ...recentCompressed]; @@ -82,7 +127,8 @@ function compressHistory(messages, options = {}) { percentage: ((saved / originalLength) * 100).toFixed(1), splitIndex, oldMessages: oldMessages.length, - recentMessages: recentMessages.length + recentMessages: recentMessages.length, + contextWindowTokens, }, 'History compression applied'); } @@ -149,26 +195,28 @@ function summarizeOldHistory(messages) { } /** - * Compress a single message - * - * Reduces message size while preserving essential information. + * Compress a single message (used for old messages outside the recent window) * * @param {Object} message - Message to compress + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object} Compressed message */ -function compressMessage(message) { +function compressMessage(message, tierName = "old", contextWindowTokens = -1) { if (!message) return message; + const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens); + const compressed = { role: message.role }; // Compress content based on type if (typeof message.content === 'string') { - compressed.content = compressText(message.content, 300); + compressed.content = compressText(message.content, limit); } else if (Array.isArray(message.content)) { compressed.content = message.content - .map(block => compressContentBlock(block)) + .map(block => compressContentBlock(block, tierName, contextWindowTokens)) .filter(Boolean); } else { compressed.content = message.content; @@ -180,13 +228,12 @@ function compressMessage(message) { /** * Compress tool results in a message while keeping other content * - * Tool results can be very large. This compresses them while - * keeping user and assistant text intact. - * * @param {Object} message - Message to process + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object} Message with compressed tool results */ -function compressToolResults(message) { +function compressToolResults(message, tierName = "recent", contextWindowTokens = -1) { if (!message) return message; const compressed = { @@ -199,7 +246,7 @@ function compressToolResults(message) { compressed.content = message.content.map(block => { // Compress tool_result blocks if (block.type === 'tool_result') { - return compressToolResultBlock(block); + return compressToolResultBlock(block, tierName, contextWindowTokens); } // Keep other blocks as-is return block; @@ -215,16 +262,20 @@ function compressToolResults(message) { * Compress a content block * * @param {Object} block - Content block + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object|null} Compressed block or null if removed */ -function compressContentBlock(block) { +function compressContentBlock(block, tierName = "old", contextWindowTokens = -1) { if (!block) return null; + const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens); + switch (block.type) { case 'text': return { type: 'text', - text: compressText(block.text, 300) + text: compressText(block.text, limit) }; case 'tool_use': @@ -237,7 +288,7 @@ function compressContentBlock(block) { }; case 'tool_result': - return compressToolResultBlock(block); + return compressToolResultBlock(block, tierName, contextWindowTokens); default: return block; @@ -247,13 +298,15 @@ function compressContentBlock(block) { /** * Compress tool result block * - * Tool results can be very large (file contents, bash output). - * Compress while preserving essential information. + * Uses dynamic limits based on compression tier and context window size + * instead of a hardcoded character limit. * * @param {Object} block - tool_result block + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object} Compressed tool_result */ -function compressToolResultBlock(block) { +function compressToolResultBlock(block, tierName = "old", contextWindowTokens = -1) { if (!block || block.type !== 'tool_result') return block; const compressed = { @@ -261,17 +314,20 @@ function compressToolResultBlock(block) { tool_use_id: block.tool_use_id, }; - // Compress content + // Compress content using dynamic limits if (typeof block.content === 'string') { - compressed.content = compressText(block.content, 500); + const limit = computeLimit(block.content, tierName, contextWindowTokens); + compressed.content = compressText(block.content, limit); } else if (Array.isArray(block.content)) { compressed.content = block.content.map(item => { if (typeof item === 'string') { - return compressText(item, 500); + const limit = computeLimit(item, tierName, contextWindowTokens); + return compressText(item, limit); } else if (item.type === 'text') { + const limit = computeLimit(item.text || "", tierName, contextWindowTokens); return { type: 'text', - text: compressText(item.text, 500) + text: compressText(item.text, limit) }; } return item; @@ -456,4 +512,6 @@ module.exports = { calculateCompressionStats, needsCompression, summarizeOldHistory, + COMPRESSION_TIERS, + computeMaxCap, }; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index bb2f9c5..3e811c7 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -10,6 +10,7 @@ const tokens = require("../utils/tokens"); const systemPrompt = require("../prompts/system"); const historyCompression = require("../context/compression"); const tokenBudget = require("../context/budget"); +const { getContextWindow } = require("../providers/context-window"); const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection"); const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom"); const { createAuditLogger } = require("../logger/audit-logger"); @@ -1049,6 +1050,7 @@ function sanitizePayload(payload) { const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0; delete clean.tools; delete clean.tool_choice; + clean._noToolInjection = true; logger.warn({ model: config.ollama?.model, message: "Removed tools for conversational message", @@ -1154,6 +1156,9 @@ function sanitizePayload(payload) { } clean.tools = selectedTools.length > 0 ? selectedTools : undefined; + if (!selectedTools.length) { + clean._noToolInjection = true; + } } clean.stream = payload.stream ?? false; @@ -1347,6 +1352,9 @@ async function runAgentLoop({ console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length); logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED'); const settings = resolveLoopOptions(options); + // Detect context window size for intelligent compression + const contextWindowTokens = await getContextWindow(); + console.log('[DEBUG] Context window detected:', contextWindowTokens, 'tokens for provider:', providerType); // Initialize audit logger (no-op if disabled) const auditLogger = createAuditLogger(config.audit); const start = Date.now(); @@ -1356,7 +1364,7 @@ async function runAgentLoop({ const toolCallNames = new Map(); const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count let loopWarningInjected = false; // Track if we've already warned about loops - const accumulatedToolResults = []; // Track tool results to include in response for CLI display + let emptyResponseRetried = false; // Track if we've retried after an empty LLM response // Log agent loop start logger.info( @@ -1406,7 +1414,6 @@ async function runAgentLoop({ } steps += 1; - console.log('[LOOP DEBUG] Entered while loop - step:', steps); logger.debug( { sessionId: session?.id ?? null, @@ -1437,7 +1444,8 @@ async function runAgentLoop({ cleanPayload.messages = historyCompression.compressHistory(originalMessages, { keepRecentTurns: config.historyCompression?.keepRecentTurns ?? 10, summarizeOlder: config.historyCompression?.summarizeOlder ?? true, - enabled: true + enabled: true, + contextWindowTokens, }); if (cleanPayload.messages !== originalMessages) { @@ -1723,7 +1731,6 @@ IMPORTANT TOOL USAGE RULES: } const databricksResponse = await invokeModel(cleanPayload); - // Extract and log actual token usage const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false ? tokens.extractUsageFromResponse(databricksResponse.json) @@ -2007,6 +2014,67 @@ IMPORTANT TOOL USAGE RULES: } } + // === EMPTY RESPONSE DETECTION (primary) === + // Check raw extracted message for empty content before tool handling or conversion + const rawTextContent = (() => { + if (typeof message.content === 'string') return message.content.trim(); + if (Array.isArray(message.content)) { + return message.content + .filter(b => b.type === 'text') + .map(b => b.text || '') + .join('') + .trim(); + } + return ''; + })(); + + if (toolCalls.length === 0 && !rawTextContent) { + console.log('[EMPTY RESPONSE] No text content and no tool calls - step:', steps, 'retried:', emptyResponseRetried); + logger.warn({ + sessionId: session?.id ?? null, + step: steps, + messageKeys: Object.keys(message), + contentType: typeof message.content, + rawContentPreview: String(message.content || '').substring(0, 100), + }, "Empty LLM response detected (no text, no tool calls)"); + + // Retry once with a nudge + if (steps < settings.maxSteps && !emptyResponseRetried) { + emptyResponseRetried = true; + cleanPayload.messages.push({ + role: "assistant", + content: "", + }); + cleanPayload.messages.push({ + role: "user", + content: "Please provide a response to the user's message.", + }); + logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge"); + continue; + } + + // Fallback after retry also returned empty + logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry"); + return { + response: { + status: 200, + body: { + id: `msg_${Date.now()}`, + type: "message", + role: "assistant", + model: requestedModel, + content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }], + stop_reason: "end_turn", + usage: { input_tokens: 0, output_tokens: 0 }, + }, + terminationReason: "empty_response_fallback", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "empty_response_fallback", + }; + } + if (toolCalls.length > 0) { // Convert OpenAI/OpenRouter format to Anthropic format for session storage let sessionContent; @@ -2506,6 +2574,15 @@ IMPORTANT TOOL USAGE RULES: providerType, }); + logger.debug( + { + id: execution.id ?? null, + name: execution.name ?? null, + arguments: execution.arguments ?? null, + content: execution.content ?? null, + is_error: execution.ok === false, + }, "executeToolCall response" ); + let toolMessage; if (providerType === "azure-anthropic") { const parsedContent = parseExecutionContent(execution.content); @@ -2594,39 +2671,6 @@ IMPORTANT TOOL USAGE RULES: }, }); - // Accumulate tool results for CLI display - // Build a standardized tool_result block in Anthropic format - logger.info({ - sessionId: session?.id ?? null, - callId: call.id, - executionId: execution.id, - toolName: call.function?.name ?? call.name ?? execution.name, - executionOk: execution.ok, - contentType: typeof execution.content, - accumulatedCountBefore: accumulatedToolResults.length - }, "=== ACCUMULATING TOOL RESULT FOR CLI - START ==="); - - const toolUseId = call.id ?? execution.id; - const toolResultContent = typeof execution.content === "string" - ? execution.content - : JSON.stringify(execution.content); - accumulatedToolResults.push({ - type: "tool_result", - tool_use_id: toolUseId, - tool_name: call.function?.name ?? call.name ?? execution.name, - content: toolResultContent, - is_error: execution.ok === false, - }); - - logger.info({ - sessionId: session?.id ?? null, - toolUseId, - toolName: call.function?.name ?? call.name ?? execution.name, - contentLength: toolResultContent.length, - contentPreview: toolResultContent.substring(0, 200), - accumulatedCountAfter: accumulatedToolResults.length - }, "=== ACCUMULATING TOOL RESULT FOR CLI - END ==="); - if (execution.ok) { logger.debug( { @@ -2746,52 +2790,7 @@ IMPORTANT TOOL USAGE RULES: lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role, }, "Tool execution complete"); - // Return tool results directly to CLI - no more LLM call needed - // The tool result IS the answer (e.g., file contents for Read) - if (accumulatedToolResults.length > 0) { - logger.info({ - sessionId: session?.id ?? null, - toolResultCount: accumulatedToolResults.length, - toolNames: accumulatedToolResults.map(r => r.tool_name) - }, "=== RETURNING TOOL RESULTS DIRECTLY TO CLI ==="); - - // Convert tool_result blocks to text blocks for CLI display - // The CLI only understands text/tool_use in responses, not tool_result - const directResponse = { - id: `msg_${Date.now()}`, - type: "message", - role: "assistant", - content: accumulatedToolResults.map(r => ({ - type: "text", - text: r.content - })), - model: requestedModel, - stop_reason: "end_turn", - usage: { input_tokens: 0, output_tokens: 0 } - }; - - return { - response: { - status: 200, - body: directResponse, - terminationReason: "tool_result_direct", - }, - steps, - durationMs: Date.now() - start, - terminationReason: "tool_result_direct", - }; - } - - logger.info({ - sessionId: session?.id ?? null, - step: steps, - toolCallsExecuted: toolCallsExecuted, - totalToolCallsInThisStep: toolCalls.length, - messageCount: cleanPayload.messages.length, - lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role, - }, "Tool execution complete - processing next toolCall"); - - continue; // Only if no tool results (shouldn't happen) + continue; // Loop back to invoke model with tool results in context } let anthropicPayload; @@ -3053,6 +3052,68 @@ IMPORTANT TOOL USAGE RULES: anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content); } + // === EMPTY RESPONSE DETECTION (safety net — post-conversion) === + // Primary detection is earlier (before tool handling). This catches edge cases + // where conversion produces empty content from non-empty raw data. + const hasTextContent = (() => { + if (Array.isArray(anthropicPayload.content)) { + return anthropicPayload.content.some(b => b.type === "text" && b.text?.trim()); + } + if (typeof anthropicPayload.content === "string") { + return anthropicPayload.content.trim().length > 0; + } + return false; + })(); + + const hasToolUseBlocks = Array.isArray(anthropicPayload.content) && + anthropicPayload.content.some(b => b.type === "tool_use"); + + if (!hasToolUseBlocks && !hasTextContent) { + logger.warn({ + sessionId: session?.id ?? null, + step: steps, + messageKeys: Object.keys(anthropicPayload), + contentType: typeof anthropicPayload.content, + contentLength: Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content || "").length, + }, "Empty LLM response detected (no text, no tool calls)"); + + // Retry once with a nudge + if (steps < settings.maxSteps && !emptyResponseRetried) { + emptyResponseRetried = true; + cleanPayload.messages.push({ + role: "assistant", + content: "", + }); + cleanPayload.messages.push({ + role: "user", + content: "Please provide a response to the user's message.", + }); + logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge"); + continue; // Go back to top of while loop + } + + // If retry also returned empty, return a fallback message + logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry"); + return { + response: { + status: 200, + body: { + id: `msg_${Date.now()}`, + type: "message", + role: "assistant", + model: requestedModel, + content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }], + stop_reason: "end_turn", + usage: { input_tokens: 0, output_tokens: 0 }, + }, + terminationReason: "empty_response_fallback", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "empty_response_fallback", + }; + } + // Ensure content is an array before calling .find() const content = Array.isArray(anthropicPayload.content) ? anthropicPayload.content : []; const fallbackCandidate = content.find( @@ -3382,88 +3443,6 @@ IMPORTANT TOOL USAGE RULES: "Agent loop completed successfully", ); - // Include accumulated tool results in the response for CLI display - // This ensures the client sees actual tool output, not just LLM summaries - logger.info({ - sessionId: session?.id ?? null, - accumulatedToolResultsCount: accumulatedToolResults.length, - hasAnthropicPayload: !!anthropicPayload, - currentContentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none', - currentContentLength: anthropicPayload?.content?.length || 0 - }, "=== BEFORE TOOL RESULTS INCLUSION CHECK ==="); - - if (accumulatedToolResults.length > 0) { - logger.info({ - sessionId: session?.id ?? null, - toolResultCount: accumulatedToolResults.length, - toolNames: accumulatedToolResults.map(r => r.tool_name), - toolUseIds: accumulatedToolResults.map(r => r.tool_use_id) - }, "=== ENTERING TOOL RESULTS INCLUSION BLOCK ==="); - - // Ensure content is an array - if (!Array.isArray(anthropicPayload.content)) { - logger.info({ - sessionId: session?.id ?? null, - originalContentType: typeof anthropicPayload.content, - originalContentValue: anthropicPayload.content ? String(anthropicPayload.content).substring(0, 100) : 'null' - }, "=== CONTENT NOT ARRAY - CONVERTING ==="); - - anthropicPayload.content = anthropicPayload.content - ? [{ type: "text", text: String(anthropicPayload.content) }] - : []; - - logger.info({ - sessionId: session?.id ?? null, - convertedContentLength: anthropicPayload.content.length - }, "=== CONTENT CONVERTED TO ARRAY ==="); - } else { - logger.info({ - sessionId: session?.id ?? null, - existingContentLength: anthropicPayload.content.length, - existingContentTypes: anthropicPayload.content.map(b => b.type) - }, "=== CONTENT ALREADY ARRAY ==="); - } - - // Prepend tool results before text content so they appear in order - const contentBeforePrepend = anthropicPayload.content.length; - anthropicPayload.content = [...accumulatedToolResults, ...anthropicPayload.content]; - - logger.info({ - sessionId: session?.id ?? null, - toolResultCount: accumulatedToolResults.length, - toolNames: accumulatedToolResults.map(r => r.tool_name), - contentBeforePrepend, - contentAfterPrepend: anthropicPayload.content.length, - finalContentTypes: anthropicPayload.content.map(b => b.type) - }, "=== TOOL RESULTS PREPENDED TO RESPONSE ==="); - - for (const block of anthropicPayload.content) { - if (block.type === "tool_result") { - logger.info({ - toolName: block.tool_name, - content: typeof block.content === 'string' - ? block.content.substring(0, 500) - : JSON.stringify(block.content).substring(0, 500) - }, "=== TOOL RESULT CONTENT SENT TO CLI ==="); - } else if (block.type === "text") { - logger.info({ - text: block.text.substring(0, 500) - }, "=== TEXT CONTENT SENT TO CLI ==="); - } - } - - } else { - logger.info({ - sessionId: session?.id ?? null - }, "=== NO TOOL RESULTS TO INCLUDE (accumulatedToolResults empty) ==="); - } - - logger.info({ - sessionId: session?.id ?? null, - finalContentLength: anthropicPayload?.content?.length || 0, - finalContentTypes: anthropicPayload?.content?.map(b => b.type) || [] - }, "=== AFTER TOOL RESULTS INCLUSION CHECK ==="); - // DIAGNOSTIC: Log response being returned logger.info({ sessionId: session?.id ?? null, diff --git a/src/providers/context-window.js b/src/providers/context-window.js new file mode 100644 index 0000000..dcea89d --- /dev/null +++ b/src/providers/context-window.js @@ -0,0 +1,144 @@ +/** + * Context Window Detection + * + * Queries the active provider for its context window size (in tokens). + * Returns -1 if unknown. Caches the result for the lifetime of the process. + */ + +const config = require("../config"); +const logger = require("../logger"); + +// Known context sizes for proprietary models (tokens) +const KNOWN_CONTEXT_SIZES = { + // Anthropic + "claude-3-opus": 200000, + "claude-3-sonnet": 200000, + "claude-3-haiku": 200000, + "claude-3.5-sonnet": 200000, + "claude-4": 200000, + // OpenAI + "gpt-4o": 128000, + "gpt-4o-mini": 128000, + "gpt-4-turbo": 128000, + "gpt-4": 8192, + "gpt-3.5-turbo": 16385, +}; + +// null = not yet detected, -1 = detected but unknown, >0 = known +let cachedContextWindow = null; + +async function detectContextWindow() { + const provider = config.modelProvider.type; + + try { + if (provider === "ollama") { + return await detectOllamaContextWindow(); + } + if (provider === "openrouter") { + return await detectOpenRouterContextWindow(); + } + if (provider === "openai") { + return detectFromKnownSizes(config.openai.model); + } + // azure-anthropic, bedrock — use known Anthropic sizes + if (["azure-anthropic", "bedrock"].includes(provider)) { + return 200000; + } + if (provider === "azure-openai") { + return detectFromKnownSizes(config.azureOpenAI.deployment); + } + if (provider === "llamacpp" || provider === "lmstudio") { + return -1; // No standard API to query + } + if (provider === "zai") { + return 128000; // GLM-4 family + } + if (provider === "vertex") { + return 1000000; // Gemini models + } + } catch (err) { + logger.warn({ err, provider }, "Failed to detect context window"); + } + + return -1; +} + +async function detectOllamaContextWindow() { + const endpoint = `${config.ollama.endpoint}/api/show`; + const response = await fetch(endpoint, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: config.ollama.model }), + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) return -1; + const data = await response.json(); + + // Ollama prefixes context_length with the architecture name + // (e.g. "llama.context_length", "qwen2.context_length", "gemma.context_length") + // Search for any key ending in ".context_length" or exactly "context_length" + if (data.model_info && typeof data.model_info === "object") { + for (const [key, value] of Object.entries(data.model_info)) { + if (key === "context_length" || key.endsWith(".context_length")) { + if (typeof value === "number" && value > 0) return value; + } + } + } + + // Fallback: parse from parameters string (e.g. "num_ctx 32768") + const match = data.parameters?.match(/num_ctx\s+(\d+)/); + if (match) return parseInt(match[1], 10); + return -1; +} + +async function detectOpenRouterContextWindow() { + const baseEndpoint = config.openrouter.endpoint || "https://openrouter.ai/api/v1/chat/completions"; + // Derive the models endpoint from the chat endpoint + const modelsEndpoint = baseEndpoint.replace(/\/v1\/chat\/completions$/, "/v1/models"); + const response = await fetch(modelsEndpoint, { + headers: { Authorization: `Bearer ${config.openrouter.apiKey}` }, + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) return -1; + const data = await response.json(); + const model = data.data?.find((m) => m.id === config.openrouter.model); + return model?.context_length ?? -1; +} + +function detectFromKnownSizes(modelName) { + if (!modelName) return -1; + const lower = modelName.toLowerCase(); + for (const [key, size] of Object.entries(KNOWN_CONTEXT_SIZES)) { + if (lower.includes(key)) return size; + } + return -1; +} + +async function getContextWindow() { + if (cachedContextWindow !== null) return cachedContextWindow; + cachedContextWindow = await detectContextWindow(); + if (cachedContextWindow === -1) { + logger.warn( + { provider: config.modelProvider.type }, + "Could not detect context window size — falling back to 8K tokens. " + + "Compression may be more aggressive than necessary.", + ); + } else { + logger.info( + { contextWindow: cachedContextWindow, provider: config.modelProvider.type }, + "Context window detected", + ); + } + return cachedContextWindow; +} + +function resetCache() { + cachedContextWindow = null; +} + +module.exports = { + getContextWindow, + detectContextWindow, + resetCache, + KNOWN_CONTEXT_SIZES, +}; diff --git a/src/tools/index.js b/src/tools/index.js index 3d76777..95f4807 100644 --- a/src/tools/index.js +++ b/src/tools/index.js @@ -343,7 +343,31 @@ async function executeToolCall(call, context = {}) { }, context, ); - const formatted = normalizeHandlerResult(result); + let formatted = normalizeHandlerResult(result); + + // Auto-approve external file reads: the user already asked to read the file, + // so re-execute transparently with user_approved=true instead of relying + // on the LLM to manage a multi-step approval conversation. + if ( + formatted.content && + typeof formatted.content === "string" && + formatted.content.startsWith("[APPROVAL REQUIRED]") + ) { + logger.info( + { tool: normalisedCall.name, id: normalisedCall.id }, + "Auto-approving external file read (user initiated the request)", + ); + const approvedResult = await registered.handler( + { + id: normalisedCall.id, + name: normalisedCall.name, + args: { ...normalisedCall.arguments, user_approved: true }, + raw: normalisedCall.raw, + }, + context, + ); + formatted = normalizeHandlerResult(approvedResult); + } // Apply tool output truncation for token efficiency const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content); diff --git a/src/tools/indexer.js b/src/tools/indexer.js index 2db3504..bf13ca8 100644 --- a/src/tools/indexer.js +++ b/src/tools/indexer.js @@ -55,10 +55,62 @@ function registerWorkspaceListTool() { ); } +/** + * Search recent conversation context for content matching a query. + * + * Scans the last 10 messages for tool_result content that matches + * the query words. Returns matches sorted by relevance. + * + * @param {string} query - Search query + * @param {Array} messages - Recent conversation messages + * @returns {Array} Matching context snippets + */ +function searchRecentContext(query, messages) { + if (!query || !messages || !Array.isArray(messages)) return []; + + const queryLower = query.toLowerCase(); + const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 2); + if (queryWords.length === 0) return []; + + const matches = []; + + // Scan last 10 messages for tool_result content + const recent = messages.slice(-10); + for (const msg of recent) { + if (msg.role !== "tool" && msg.role !== "user") continue; + + const content = + typeof msg.content === "string" + ? msg.content + : Array.isArray(msg.content) + ? msg.content + .filter((b) => b.type === "tool_result" || b.type === "text") + .map((b) => b.content ?? b.text ?? "") + .join("\n") + : ""; + + if (!content || content.length < 20) continue; + + // Check if any query words appear in the content + const contentLower = content.toLowerCase(); + const matchCount = queryWords.filter((w) => contentLower.includes(w)).length; + + if (matchCount > 0 && matchCount / queryWords.length >= 0.3) { + matches.push({ + source: "conversation_context", + relevance: matchCount / queryWords.length, + preview: content.substring(0, 500), + }); + } + } + + return matches.sort((a, b) => b.relevance - a.relevance).slice(0, 3); +} + function registerWorkspaceSearchTool() { registerTool( "workspace_search", - async ({ args = {} }) => { + async ({ args = {} }, context = {}) => { const query = args.query ?? args.term ?? args.pattern; const regex = args.regex === true || args.is_regex === true; const limit = Number.isInteger(args.limit) ? args.limit : undefined; @@ -69,6 +121,9 @@ function registerWorkspaceSearchTool() { ? args.ignore : undefined; + // Check recent conversation context for matching content + const contextMatches = searchRecentContext(query, context.requestMessages); + const result = await searchWorkspace({ query, regex, @@ -76,12 +131,21 @@ function registerWorkspaceSearchTool() { ignore, }); + // Prepend context matches if found + if (contextMatches.length > 0) { + result.context_matches = contextMatches; + result.note = + "Results from recently read files are listed in context_matches. " + + "Prefer these over workspace matches when answering about previously read content."; + } + return { ok: true, status: 200, content: JSON.stringify(result, null, 2), metadata: { total: result.matches.length, + contextTotal: contextMatches.length, engine: result.engine, }, }; diff --git a/src/tools/workspace.js b/src/tools/workspace.js index 9971ba3..37933ae 100644 --- a/src/tools/workspace.js +++ b/src/tools/workspace.js @@ -43,13 +43,9 @@ function registerWorkspaceTools() { const expanded = expandTilde(targetPath); const resolved = path.resolve(expanded); return { - ok: false, - status: 403, - content: JSON.stringify({ - error: "external_path_requires_approval", - message: `The file "${targetPath}" resolves to "${resolved}" which is outside the workspace. You MUST ask the user for permission before reading this file. If the user approves, call this tool again with the same path and set user_approved to true.`, - resolved_path: resolved, - }), + ok: true, + status: 200, + content: `[APPROVAL REQUIRED] The file "${resolved}" is outside the workspace and cannot be read without user permission.\n\nYou must now ask the user: "The file ${resolved} is outside the workspace. May I read it?"\n\nIf the user says yes, call the Read tool again with file_path="${targetPath}" and user_approved=true.`, }; } // User approved — read external file From dbbf3787228c0a433471ec5e04077670576e847c Mon Sep 17 00:00:00 2001 From: bjoern Date: Tue, 10 Feb 2026 09:46:17 +0100 Subject: [PATCH 5/7] Add SUGGESTION_MODE_MODEL env var to control suggestion mode LLM calls Three concurrent runAgentLoop calls per user message waste GPU time with large models (~30s each). This adds SUGGESTION_MODE_MODEL config to skip ("none") or redirect suggestion mode to a lighter model. Also adds ISO timestamps and mode tags to debug logs for easier debugging. --- .env.example | 8 +++ src/clients/databricks.js | 8 +-- src/config/index.js | 9 +++ src/orchestrator/index.js | 138 +++++++++++++++++++++++++++++++++++++- 4 files changed, 157 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index a462e81..b85da41 100644 --- a/.env.example +++ b/.env.example @@ -212,6 +212,14 @@ WORKSPACE_INDEX_ENABLED=true # - client/passthrough: Return tool calls to CLI for local execution TOOL_EXECUTION_MODE=server +# Suggestion mode model override +# Controls which model handles suggestion mode (predicting next user input). +# Values: +# default - Use the same model as MODEL_PROVIDER (no change) +# none - Skip suggestion mode LLM calls entirely (saves GPU time) +# - Use a specific model (e.g. "llama3.1" for a lighter model) +SUGGESTION_MODE_MODEL=default + # Enable/disable automatic tool injection for local models INJECT_TOOLS_LLAMACPP=true INJECT_TOOLS_OLLAMA=true diff --git a/src/clients/databricks.js b/src/clients/databricks.js index 3f82be7..09fc176 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -309,7 +309,7 @@ async function invokeOllama(body) { } const ollamaBody = { - model: config.ollama.model, + model: body._suggestionModeModel || config.ollama.model, messages: deduplicated, stream: false, // Force non-streaming for Ollama - streaming format conversion not yet implemented options: { @@ -399,7 +399,7 @@ async function invokeOpenRouter(body) { } const openRouterBody = { - model: config.openrouter.model, + model: body._suggestionModeModel || config.openrouter.model, messages, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 4096, @@ -485,7 +485,7 @@ async function invokeAzureOpenAI(body) { max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit top_p: body.top_p ?? 1.0, stream: false, // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented - model: config.azureOpenAI.deployment + model: body._suggestionModeModel || config.azureOpenAI.deployment }; // Add tools - inject standard tools if client didn't send any (passthrough mode) @@ -831,7 +831,7 @@ async function invokeOpenAI(body) { } const openAIBody = { - model: config.openai.model || "gpt-4o", + model: body._suggestionModeModel || config.openai.model || "gpt-4o", messages, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 4096, diff --git a/src/config/index.js b/src/config/index.js index d0b82ec..4c30825 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -134,6 +134,10 @@ const zaiModel = process.env.ZAI_MODEL?.trim() || "GLM-4.7"; const vertexApiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null; const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash"; +// Suggestion mode model override +// Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name +const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim(); + // Hot reload configuration const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10); @@ -171,6 +175,9 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) { ); } console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`); +if (suggestionModeModel.toLowerCase() !== "default") { + console.log(`[CONFIG] Suggestion mode model: ${suggestionModeModel}`); +} // Memory system configuration (Titans-inspired long-term memory) const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true @@ -595,6 +602,7 @@ var config = { modelProvider: { type: modelProvider, defaultModel, + suggestionModeModel, // Hybrid routing settings preferOllama, fallbackEnabled, @@ -884,6 +892,7 @@ function reloadConfig() { config.modelProvider.preferOllama = process.env.PREFER_OLLAMA === "true"; config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false"; config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase(); + config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim(); // Log level config.logger.level = process.env.LOG_LEVEL ?? "info"; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index 3e811c7..167c97a 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -1253,6 +1253,15 @@ function sanitizePayload(payload) { toolCount: clean.tools?.length ?? 0 }, '[CONTEXT_FLOW] After sanitizePayload'); + // === Suggestion mode: tag request and override model if configured === + const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages); + clean._requestMode = isSuggestion ? "suggestion" : "main"; + const smConfig = config.modelProvider?.suggestionModeModel ?? "default"; + if (isSuggestion && smConfig.toLowerCase() !== "default" && smConfig.toLowerCase() !== "none") { + clean.model = smConfig; + clean._suggestionModeModel = smConfig; + } + return clean; } @@ -1349,7 +1358,7 @@ async function runAgentLoop({ providerType, headers, }) { - console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length); + console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length, 'mode:', cleanPayload._requestMode || 'main', 'model:', cleanPayload.model); logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED'); const settings = resolveLoopOptions(options); // Detect context window size for intelligent compression @@ -1730,7 +1739,62 @@ IMPORTANT TOOL USAGE RULES: }); } + // === DEBUG: Log request to LLM === + console.log('\n[LLM REQUEST]', new Date().toISOString(), 'step:', steps, 'model:', cleanPayload.model, 'provider:', providerType, 'mode:', cleanPayload._requestMode || 'main'); + console.log('[LLM REQUEST] messages (' + (cleanPayload.messages?.length ?? 0) + '):'); + for (const m of (cleanPayload.messages || [])) { + const preview = typeof m.content === 'string' + ? m.content.substring(0, 200) + : Array.isArray(m.content) + ? m.content.map(b => b.type + ':' + (b.text || b.name || b.tool_use_id || '').substring(0, 80)).join(' | ') + : JSON.stringify(m.content).substring(0, 200); + console.log(' [' + m.role + '] ' + preview); + } + console.log('[LLM REQUEST] tools:', (cleanPayload.tools || []).map(t => t.name || t.function?.name).join(', ') || '(none)'); + console.log('[LLM REQUEST] _noToolInjection:', !!cleanPayload._noToolInjection); + const databricksResponse = await invokeModel(cleanPayload); + + // === DEBUG: Log response from LLM === + console.log('\n[LLM RESPONSE]', new Date().toISOString(), 'ok:', databricksResponse.ok, 'status:', databricksResponse.status, 'stream:', !!databricksResponse.stream, 'mode:', cleanPayload._requestMode || 'main'); + if (databricksResponse.json) { + const rj = databricksResponse.json; + // Anthropic format + if (rj.content) { + console.log('[LLM RESPONSE] Anthropic format - content blocks:', Array.isArray(rj.content) ? rj.content.length : typeof rj.content); + if (Array.isArray(rj.content)) { + for (const b of rj.content) { + if (b.type === 'text') console.log(' [text] ' + (b.text || '').substring(0, 300)); + else if (b.type === 'tool_use') console.log(' [tool_use] ' + b.name + '(' + JSON.stringify(b.input).substring(0, 200) + ')'); + else console.log(' [' + b.type + ']'); + } + } + console.log('[LLM RESPONSE] stop_reason:', rj.stop_reason); + } + // OpenAI format + if (rj.choices) { + const msg = rj.choices[0]?.message; + console.log('[LLM RESPONSE] OpenAI format - finish_reason:', rj.choices[0]?.finish_reason); + console.log(' [content] ' + (msg?.content || '(null)').substring(0, 300)); + if (msg?.tool_calls?.length) { + for (const tc of msg.tool_calls) { + console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + (tc.function?.arguments || '').substring(0, 200) + ')'); + } + } + } + // Ollama format + if (rj.message && !rj.choices && !rj.content) { + console.log('[LLM RESPONSE] Ollama format - done:', rj.done); + console.log(' [content] ' + (rj.message.content || '(empty)').substring(0, 300)); + if (rj.message.tool_calls?.length) { + for (const tc of rj.message.tool_calls) { + console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + JSON.stringify(tc.function?.arguments || {}).substring(0, 200) + ')'); + } + } + } + } else { + console.log('[LLM RESPONSE] no json body - raw:', String(databricksResponse.body || '').substring(0, 300)); + } // Extract and log actual token usage const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false ? tokens.extractUsageFromResponse(databricksResponse.json) @@ -2075,11 +2139,27 @@ IMPORTANT TOOL USAGE RULES: }; } + // Guard: drop hallucinated tool calls when no tools were sent to the model. + // Some models (e.g. Llama 3.1) hallucinate tool_call blocks from conversation + // history even when the request contained zero tool definitions. + const toolsWereSent = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0; + if (toolCalls.length > 0 && !toolsWereSent) { + console.log('[HALLUCINATION GUARD] Model returned', toolCalls.length, 'tool call(s) but no tools were offered — ignoring:', toolCalls.map(tc => tc.function?.name || tc.name)); + logger.warn({ + sessionId: session?.id ?? null, + step: steps, + hallucinated: toolCalls.map(tc => tc.function?.name || tc.name), + noToolInjection: !!cleanPayload._noToolInjection, + }, "Dropped hallucinated tool calls (no tools were sent to model)"); + toolCalls = []; + // If there's also no text content, treat as empty response (handled below) + } + if (toolCalls.length > 0) { // Convert OpenAI/OpenRouter format to Anthropic format for session storage let sessionContent; if (providerType === "azure-anthropic") { - // Azure Anthropic already returns content in Anthropic format + // Azure Anthropic already returns content in Anthropic sessionContent = databricksResponse.json?.content ?? []; } else { // Convert OpenAI/OpenRouter format to Anthropic content blocks @@ -3520,6 +3600,34 @@ IMPORTANT TOOL USAGE RULES: }; } +/** + * Detect if the current request is a suggestion mode call. + * Scans the last user message for the [SUGGESTION MODE: marker. + * @param {Array} messages - The conversation messages + * @returns {{ isSuggestionMode: boolean }} + */ +function detectSuggestionMode(messages) { + if (!Array.isArray(messages) || messages.length === 0) { + return { isSuggestionMode: false }; + } + // Scan from the end to find the last user message + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg?.role !== 'user') continue; + const content = typeof msg.content === 'string' + ? msg.content + : Array.isArray(msg.content) + ? msg.content.map(b => b.text || '').join(' ') + : ''; + if (content.includes('[SUGGESTION MODE:')) { + return { isSuggestionMode: true }; + } + // Only check the last user message + break; + } + return { isSuggestionMode: false }; +} + async function processMessage({ payload, headers, session, cwd, options = {} }) { const requestedModel = payload?.model ?? @@ -3529,6 +3637,32 @@ async function processMessage({ payload, headers, session, cwd, options = {} }) typeof headers?.["anthropic-beta"] === "string" && headers["anthropic-beta"].includes("interleaved-thinking"); + // === SUGGESTION MODE: Early return when SUGGESTION_MODE_MODEL=none === + const { isSuggestionMode } = detectSuggestionMode(payload?.messages); + const suggestionModelConfig = config.modelProvider?.suggestionModeModel ?? "default"; + if (isSuggestionMode && suggestionModelConfig.toLowerCase() === "none") { + console.log('[SUGGESTION MODE] Skipping LLM call (SUGGESTION_MODE_MODEL=none)'); + return { + response: { + json: { + id: `msg_suggestion_skip_${Date.now()}`, + type: "message", + role: "assistant", + content: [{ type: "text", text: "" }], + model: requestedModel, + stop_reason: "end_turn", + stop_sequence: null, + usage: { input_tokens: 0, output_tokens: 0 }, + }, + ok: true, + status: 200, + }, + steps: 0, + durationMs: 0, + terminationReason: "suggestion_mode_skip", + }; + } + // === TOOL LOOP GUARD (EARLY CHECK) === // Check BEFORE sanitization since sanitizePayload removes conversation history const toolLoopThreshold = config.policy?.toolLoopThreshold ?? 3; From 8009074f1914790eb5580e4d14a1b1dbb0996676 Mon Sep 17 00:00:00 2001 From: bjoern Date: Tue, 10 Feb 2026 10:03:50 +0100 Subject: [PATCH 6/7] Handle Ollama offline gracefully with retry and 503 response - Add ECONNREFUSED to retryable errors and check undici TypeError .cause.code so connection-refused errors get retried with backoff - Wrap invokeModel in try/catch returning structured 503 with provider_unreachable error instead of raw TypeError bubbling to Express error middleware - Fix suggestion mode early return response shape (json -> body) to match router expectations --- src/clients/retry.js | 7 ++++++- src/orchestrator/index.js | 30 ++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/clients/retry.js b/src/clients/retry.js index 2178206..5d90654 100644 --- a/src/clients/retry.js +++ b/src/clients/retry.js @@ -10,7 +10,7 @@ const DEFAULT_CONFIG = { backoffMultiplier: 2, jitterFactor: 0.1, // 10% jitter retryableStatuses: [429, 500, 502, 503, 504], - retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH'], + retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH', 'ECONNREFUSED'], }; /** @@ -44,6 +44,11 @@ function isRetryable(error, response, config) { return true; } + // Check nested cause (Node undici wraps connection errors as TypeError) + if (error && error.cause?.code && config.retryableErrors.includes(error.cause.code)) { + return true; + } + // Check for network errors if (error && (error.name === 'FetchError' || error.name === 'AbortError')) { return true; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index 167c97a..6988e3b 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -1753,7 +1753,33 @@ IMPORTANT TOOL USAGE RULES: console.log('[LLM REQUEST] tools:', (cleanPayload.tools || []).map(t => t.name || t.function?.name).join(', ') || '(none)'); console.log('[LLM REQUEST] _noToolInjection:', !!cleanPayload._noToolInjection); - const databricksResponse = await invokeModel(cleanPayload); + let databricksResponse; + try { + databricksResponse = await invokeModel(cleanPayload); + } catch (modelError) { + const isConnectionError = modelError.cause?.code === 'ECONNREFUSED' + || modelError.message?.includes('fetch failed') + || modelError.code === 'ECONNREFUSED'; + if (isConnectionError) { + console.error(`[LLM ERROR] ${new Date().toISOString()} Provider ${providerType} is unreachable (connection refused). Is it running?`); + return { + response: { + status: 503, + body: { + error: { + type: "provider_unreachable", + message: `Provider ${providerType} is unreachable. Is the service running?`, + }, + }, + terminationReason: "provider_unreachable", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "provider_unreachable", + }; + } + throw modelError; + } // === DEBUG: Log response from LLM === console.log('\n[LLM RESPONSE]', new Date().toISOString(), 'ok:', databricksResponse.ok, 'status:', databricksResponse.status, 'stream:', !!databricksResponse.stream, 'mode:', cleanPayload._requestMode || 'main'); @@ -3644,7 +3670,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} }) console.log('[SUGGESTION MODE] Skipping LLM call (SUGGESTION_MODE_MODEL=none)'); return { response: { - json: { + body: { id: `msg_suggestion_skip_${Date.now()}`, type: "message", role: "assistant", From 6bcc8037b2f82b471b114c762f74975e201bf2d3 Mon Sep 17 00:00:00 2001 From: bjoern Date: Tue, 10 Feb 2026 16:52:56 +0100 Subject: [PATCH 7/7] Fix stripThinkingBlocks() destroying markdown bullet points in Ollama responses The heuristic-based stripThinkingBlocks() matched standard markdown bullets (- item, * item) as "thinking block markers" and dropped all subsequent content. Replace with stripThinkTags() that only strips ... tags used by models like DeepSeek and Qwen for chain-of-thought reasoning. --- src/config/index.js | 13 +++- src/orchestrator/index.js | 155 +++++++++++++++++++++++++------------- 2 files changed, 115 insertions(+), 53 deletions(-) diff --git a/src/config/index.js b/src/config/index.js index 4c30825..9da383a 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -1,7 +1,9 @@ const path = require("path"); const dotenv = require("dotenv"); -dotenv.config(); +// .env must be authoritative over shell env vars (e.g. stale exports in .bashrc). +// Skip override in test mode so tests can set process.env before requiring config. +dotenv.config({ override: process.env.NODE_ENV !== "test" }); function trimTrailingSlash(value) { if (typeof value !== "string") return value; @@ -138,6 +140,10 @@ const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash"; // Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim(); +// Topic detection model override +// Values: "default" (use main model) or a model name to redirect topic detection to a lighter model +const topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim(); + // Hot reload configuration const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10); @@ -178,6 +184,9 @@ console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`); if (suggestionModeModel.toLowerCase() !== "default") { console.log(`[CONFIG] Suggestion mode model: ${suggestionModeModel}`); } +if (topicDetectionModel.toLowerCase() !== "default") { + console.log(`[CONFIG] Topic detection model: ${topicDetectionModel}`); +} // Memory system configuration (Titans-inspired long-term memory) const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true @@ -603,6 +612,7 @@ var config = { type: modelProvider, defaultModel, suggestionModeModel, + topicDetectionModel, // Hybrid routing settings preferOllama, fallbackEnabled, @@ -893,6 +903,7 @@ function reloadConfig() { config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false"; config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase(); config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim(); + config.modelProvider.topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim(); // Log level config.logger.level = process.env.LOG_LEVEL ?? "info"; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index 6988e3b..3e07ac2 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -670,53 +670,11 @@ function normaliseToolChoice(choice) { } /** - * Strip thinking-style reasoning from Ollama model outputs - * Patterns to remove: - * - Lines starting with bullet points (●, •, -, *) - * - Explanatory reasoning before the actual response - * - Multiple newlines used to separate thinking from response + * Strip ... tags that some models (DeepSeek, Qwen) emit for chain-of-thought reasoning. */ -function stripThinkingBlocks(text) { +function stripThinkTags(text) { if (typeof text !== "string") return text; - - // Split into lines - const lines = text.split("\n"); - const cleanedLines = []; - let inThinkingBlock = false; - let consecutiveEmptyLines = 0; - - for (const line of lines) { - const trimmed = line.trim(); - - // Detect thinking block markers (bullet points followed by reasoning) - if (/^[●•\-\*]\s/.test(trimmed)) { - inThinkingBlock = true; - continue; - } - - // Empty lines might separate thinking from response - if (trimmed === "") { - consecutiveEmptyLines++; - // If we've seen 2+ empty lines, likely end of thinking block - if (consecutiveEmptyLines >= 2) { - inThinkingBlock = false; - } - continue; - } - - // Reset empty line counter - consecutiveEmptyLines = 0; - - // Skip lines that are part of thinking block - if (inThinkingBlock) { - continue; - } - - // Keep this line - cleanedLines.push(line); - } - - return cleanedLines.join("\n").trim(); + return text.replace(/[\s\S]*?<\/think>/g, "").trim(); } function ollamaToAnthropicResponse(ollamaResponse, requestedModel) { @@ -733,7 +691,7 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) { // Add text content if present, after stripping thinking blocks if (typeof rawContent === "string" && rawContent.trim()) { - const cleanedContent = stripThinkingBlocks(rawContent); + const cleanedContent = stripThinkTags(rawContent); if (cleanedContent) { contentItems.push({ type: "text", text: cleanedContent }); } @@ -1262,6 +1220,19 @@ function sanitizePayload(payload) { clean._suggestionModeModel = smConfig; } + // === Topic detection: tag request and override model if configured === + if (clean._requestMode === "main") { + const { isTopicDetection: isTopic } = detectTopicDetection(clean); + if (isTopic) { + clean._requestMode = "topic"; + const tdConfig = config.modelProvider?.topicDetectionModel ?? "default"; + if (tdConfig.toLowerCase() !== "default") { + clean.model = tdConfig; + clean._topicDetectionModel = tdConfig; + } + } + } + return clean; } @@ -1371,7 +1342,7 @@ async function runAgentLoop({ let toolCallsExecuted = 0; let fallbackPerformed = false; const toolCallNames = new Map(); - const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count + const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> counta let loopWarningInjected = false; // Track if we've already warned about loops let emptyResponseRetried = false; // Track if we've retried after an empty LLM response @@ -1801,25 +1772,25 @@ IMPORTANT TOOL USAGE RULES: if (rj.choices) { const msg = rj.choices[0]?.message; console.log('[LLM RESPONSE] OpenAI format - finish_reason:', rj.choices[0]?.finish_reason); - console.log(' [content] ' + (msg?.content || '(null)').substring(0, 300)); + console.log(' [content] ' + (msg?.content || '(null)')); if (msg?.tool_calls?.length) { for (const tc of msg.tool_calls) { - console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + (tc.function?.arguments || '').substring(0, 200) + ')'); + console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + (tc.function?.arguments || '') + ')'); } } } // Ollama format if (rj.message && !rj.choices && !rj.content) { console.log('[LLM RESPONSE] Ollama format - done:', rj.done); - console.log(' [content] ' + (rj.message.content || '(empty)').substring(0, 300)); + console.log(' [content] ' + (rj.message.content || '(empty)')); if (rj.message.tool_calls?.length) { for (const tc of rj.message.tool_calls) { - console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + JSON.stringify(tc.function?.arguments || {}).substring(0, 200) + ')'); + console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + JSON.stringify(tc.function?.arguments || {}) + ')'); } } } } else { - console.log('[LLM RESPONSE] no json body - raw:', String(databricksResponse.body || '').substring(0, 300)); + console.log('[LLM RESPONSE] no json body - raw:', String(databricksResponse.body || '')); } // Extract and log actual token usage const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false @@ -3654,6 +3625,86 @@ function detectSuggestionMode(messages) { return { isSuggestionMode: false }; } +/** + * Detect if the current request is a topic detection/classification call. + * These requests typically have a system prompt asking to classify conversation + * topics, with no tools and very short messages. They waste GPU time on large + * models (30-90s just to classify a topic). + * + * Detection heuristics: + * 1. System prompt contains topic classification instructions + * 2. No tools in the payload (topic detection never needs tools) + * 3. Short message count (typically 1-3 messages) + * + * @param {Object} payload - The request payload + * @returns {{ isTopicDetection: boolean }} + */ +function detectTopicDetection(payload) { + if (!payload) return { isTopicDetection: false }; + + // Topic detection requests have no tools + if (Array.isArray(payload.tools) && payload.tools.length > 0) { + return { isTopicDetection: false }; + } + + // Check system prompt for topic classification patterns + const systemText = typeof payload.system === 'string' + ? payload.system + : Array.isArray(payload.system) + ? payload.system.map(b => b.text || '').join(' ') + : ''; + + // Also check first message if system prompt is embedded there + let firstMsgText = ''; + if (Array.isArray(payload.messages) && payload.messages.length > 0) { + const first = payload.messages[0]; + if (first?.role === 'user' || first?.role === 'system') { + firstMsgText = typeof first.content === 'string' + ? first.content + : Array.isArray(first.content) + ? first.content.map(b => b.text || '').join(' ') + : ''; + } + } + + const combined = systemText + ' ' + firstMsgText; + const lc = combined.toLowerCase(); + + // Match patterns that Claude Code uses for topic detection + const topicPatterns = [ + 'new conversation topic', + 'topic change', + 'classify the topic', + 'classify this message', + 'conversation topic', + 'topic classification', + 'determines the topic', + 'determine the topic', + 'categorize the topic', + 'what topic', + 'identify the topic', + ]; + + const hasTopicPattern = topicPatterns.some(p => lc.includes(p)); + + if (hasTopicPattern) { + return { isTopicDetection: true }; + } + + // Additional heuristic: very short payload with no tools and system prompt + // mentioning "topic" or "classify" + if ( + !payload.tools && + Array.isArray(payload.messages) && + payload.messages.length <= 3 && + (lc.includes('topic') || lc.includes('classify')) + ) { + return { isTopicDetection: true }; + } + + return { isTopicDetection: false }; +} + async function processMessage({ payload, headers, session, cwd, options = {} }) { const requestedModel = payload?.model ??