From 2247bfafbc57f8cfcdf3fe8cf701009b7fd2e549 Mon Sep 17 00:00:00 2001 From: bjoern Date: Thu, 5 Feb 2026 21:05:45 +0100 Subject: [PATCH 1/2] Improve tool calling response handling - Add fallback parsing for Ollama models that return tool calls as JSON text in message content instead of using the structured tool_calls field - Return tool results directly to CLI instead of making a follow-up LLM call, reducing latency and preventing hallucinated rewrites of output - Add dedicated Glob tool returning plain text (one path per line) instead of JSON, with workspace_list accepting both 'pattern' and 'patterns' - Clarify why Glob is not aliased to workspace_list (format mismatch) --- src/clients/ollama-utils.js | 69 +++++++++++++++++++++++++++++++++++++ src/orchestrator/index.js | 47 ++++++++++++++++++++++++- src/tools/index.js | 4 +++ src/tools/indexer.js | 50 ++++++++++++++++++++++++--- 4 files changed, 165 insertions(+), 5 deletions(-) diff --git a/src/clients/ollama-utils.js b/src/clients/ollama-utils.js index 7582f05..2cd95c9 100644 --- a/src/clients/ollama-utils.js +++ b/src/clients/ollama-utils.js @@ -93,6 +93,65 @@ function convertAnthropicToolsToOllama(anthropicTools) { })); } +/** + * Extract tool call from text when LLM outputs JSON instead of using tool_calls + * Handles formats like: {"name": "Read", "parameters": {...}} + * + * @param {string} text - Text content that may contain JSON tool call + * @returns {object|null} - Tool call object in Ollama format, or null if not found + */ +function extractToolCallFromText(text) { + if (!text || typeof text !== 'string') return null; + + // Find potential JSON start - look for {"name" pattern + const startMatch = text.match(/\{\s*"name"\s*:/); + if (!startMatch) return null; + + const startIdx = startMatch.index; + + // Find matching closing brace using brace counting + let braceCount = 0; + let endIdx = -1; + for (let i = startIdx; i < text.length; i++) { + if (text[i] === '{') braceCount++; + else if (text[i] === '}') { + braceCount--; + if (braceCount === 0) { + endIdx = i + 1; + break; + } + } + } + + if (endIdx === -1) return null; + + const jsonStr = text.substring(startIdx, endIdx); + + try { + const parsed = JSON.parse(jsonStr); + + if (!parsed.name || !parsed.parameters) { + return null; + } + + logger.info({ + toolName: parsed.name, + params: parsed.parameters, + originalText: text.substring(0, 200) + }, "Extracted tool call from text content (fallback parsing)"); + + return { + function: { + name: parsed.name, + arguments: parsed.parameters + } + }; + } catch (e) { + logger.debug({ error: e.message, text: text.substring(0, 200) }, "Failed to parse extracted tool call"); + return null; + } +} + /** * Convert Ollama tool call response to Anthropic format * @@ -126,6 +185,15 @@ function convertOllamaToolCallsToAnthropic(ollamaResponse) { const toolCalls = message.tool_calls || []; const textContent = message.content || ""; + // FALLBACK: If no tool_calls but text contains JSON tool call, parse it + if (toolCalls.length === 0 && textContent) { + const extracted = extractToolCallFromText(textContent); + if (extracted) { + logger.info({ extractedTool: extracted.function?.name }, "Using fallback text parsing for tool call"); + toolCalls = [extracted]; + } + } + const contentBlocks = []; // Add text content if present @@ -217,4 +285,5 @@ module.exports = { convertOllamaToolCallsToAnthropic, buildAnthropicResponseFromOllama, modelNameSupportsTools, + extractToolCallFromText, }; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index d553b69..9596db6 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -2562,7 +2562,52 @@ IMPORTANT TOOL USAGE RULES: } } - continue; + logger.info({ + sessionId: session?.id ?? null, + step: steps, + toolCallsExecuted: toolCallsExecuted, + totalToolCallsInThisStep: toolCalls.length, + messageCount: cleanPayload.messages.length, + lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role, + }, "Tool execution complete"); + + // Return tool results directly to CLI - no more LLM call needed + // The tool result IS the answer (e.g., file contents for Read) + if (accumulatedToolResults.length > 0) { + auditLog("=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===", { + sessionId: session?.id ?? null, + toolResultCount: accumulatedToolResults.length, + toolNames: accumulatedToolResults.map(r => r.tool_name) + }); + + // Convert tool_result blocks to text blocks for CLI display + // The CLI only understands text/tool_use in responses, not tool_result + const directResponse = { + id: `msg_${Date.now()}`, + type: "message", + role: "assistant", + content: accumulatedToolResults.map(r => ({ + type: "text", + text: r.content + })), + model: requestedModel, + stop_reason: "end_turn", + usage: { input_tokens: 0, output_tokens: 0 } + }; + + return { + response: { + status: 200, + body: directResponse, + terminationReason: "tool_result_direct", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "tool_result_direct", + }; + } + + continue; // Only if no tool results (shouldn't happen) } let anthropicPayload; diff --git a/src/tools/index.js b/src/tools/index.js index 11227f0..a6a6296 100644 --- a/src/tools/index.js +++ b/src/tools/index.js @@ -88,6 +88,10 @@ const TOOL_ALIASES = { runtests: "workspace_test_run", testsummary: "workspace_test_summary", testhistory: "workspace_test_history", + // Glob has dedicated tool in src/tools/indexer.js (registerGlobTool) + // - returns plain text format instead of JSON + // glob: "workspace_list", + // Glob: "workspace_list", }; function coerceString(value) { diff --git a/src/tools/indexer.js b/src/tools/indexer.js index eb0a981..2db3504 100644 --- a/src/tools/indexer.js +++ b/src/tools/indexer.js @@ -16,11 +16,13 @@ function registerWorkspaceListTool() { registerTool( "workspace_list", async ({ args = {} }) => { + // Support both 'pattern' (Glob tool) and 'patterns' (workspace_list) + const rawPatterns = args.pattern ?? args.patterns; const patterns = - typeof args.patterns === "string" - ? [args.patterns] - : Array.isArray(args.patterns) - ? args.patterns + typeof rawPatterns === "string" + ? [rawPatterns] + : Array.isArray(rawPatterns) + ? rawPatterns : undefined; const ignore = typeof args.ignore === "string" @@ -260,6 +262,45 @@ function registerSymbolReferencesTool() { ); } + +/** + * Dedicated Glob tool for Claude Code compatibility (maybe others?). + * + * Why this exists (instead of using workspace_list alias): + * - Claude Code's Glob tool returns plain text (one path per line) + * - workspace_list returns JSON with entries array + * - Models expect plain text format from Glob tool + * + * See also: TOOL_ALIASES in src/tools/index.js (commented glob entries) + */ +function registerGlobTool() { + registerTool( + "Glob", + async ({ args = {} }) => { + const pattern = args.pattern; + const basePath = args.path; + + let patterns; + if (basePath) { + const cleanPath = basePath.replace(/\/+$/, ""); + patterns = pattern ? [`${cleanPath}/${pattern}`] : [`${cleanPath}/**/*`]; + } else { + patterns = pattern ? [pattern] : undefined; + } + + const entries = await listWorkspaceFiles({ patterns, limit: 1000 }); + + // Plain text output: one path per line (Claude Code format) + return { + ok: true, + status: 200, + content: entries.map((e) => e.path).join("\n"), + }; + }, + { category: "indexing" }, + ); +} + function registerGotoDefinitionTool() { registerTool( "workspace_goto_definition", @@ -353,6 +394,7 @@ function registerIndexerTools() { registerSymbolSearchTool(); registerSymbolReferencesTool(); registerGotoDefinitionTool(); + registerGlobTool(); } module.exports = { From 3008d49ad7393ecd30425bcbbf11105dcf1e8517 Mon Sep 17 00:00:00 2001 From: bjoern Date: Fri, 6 Feb 2026 18:29:59 +0100 Subject: [PATCH 2/2] Improve tool calling response handling - Additional logging for tool call parsing and execution - Hard-coded shell commands for reliable tool execution - Deduplication of tool calls within a single response - Collect and return results from all called tools - Ollama uses specified Ollama model - Fix double-serialized JSON parameters from some providers --- src/api/middleware/logging.js | 96 +++++++-- src/api/middleware/request-logging.js | 18 +- src/api/router.js | 77 ++++++- src/config/index.js | 3 + src/orchestrator/index.js | 297 +++++++++++++++++++++++++- src/tools/index.js | 128 ++++++++++- src/tools/stubs.js | 29 +++ src/tools/workspace.js | 4 +- 8 files changed, 614 insertions(+), 38 deletions(-) diff --git a/src/api/middleware/logging.js b/src/api/middleware/logging.js index e53faee..dc72b03 100644 --- a/src/api/middleware/logging.js +++ b/src/api/middleware/logging.js @@ -12,26 +12,92 @@ function maskHeaders(headers = {}) { return clone; } -const loggingMiddleware = pinoHttp({ +const baseLoggingMiddleware = pinoHttp({ logger, - customProps: (req) => ({ + autoLogging: false, // Disable automatic logging so we can log manually with bodies + customProps: (req, res) => ({ sessionId: req.sessionId ?? null, }), - customLogLevel: (req, res, err) => { - if (err || res.statusCode >= 500) return "error"; - if (res.statusCode >= 400) return "warn"; - return "info"; - }, - wrapSerializers: true, - serializers: { - req(req) { - return { +}); + +// Wrapper middleware to capture and log full request/response bodies +function loggingMiddleware(req, res, next) { + const startTime = Date.now(); + + // Log request with full body immediately + logger.info({ + sessionId: req.sessionId ?? null, + req: { + method: req.method, + url: req.url, + headers: maskHeaders(req.headers), + }, + requestBody: req.body, // Full request body without truncation + }, 'request started'); + + // Intercept res.write for streaming responses + const originalWrite = res.write; + const chunks = []; + res.write = function (chunk) { + if (chunk) { + chunks.push(Buffer.from(chunk)); + } + return originalWrite.apply(this, arguments); + }; + + // Intercept res.send to capture the body + const originalSend = res.send; + res.send = function (body) { + res._capturedBody = body; + + // Parse if it's a JSON string for better logging + if (typeof body === 'string') { + try { + res._capturedBody = JSON.parse(body); + } catch (e) { + res._capturedBody = body; + } + } + + return originalSend.call(this, body); + }; + + // Log response when finished + res.on('finish', () => { + const responseTime = Date.now() - startTime; + + // Capture streaming body if not already captured via send() + if (chunks.length > 0 && !res._capturedBody) { + const fullBody = Buffer.concat(chunks).toString('utf8'); + res._capturedBody = { + type: 'stream', + contentType: res.getHeader('content-type'), + size: fullBody.length, + preview: fullBody.substring(0, 1000) + }; + } + + const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info'; + + logger[logLevel]({ + sessionId: req.sessionId ?? null, + req: { method: req.method, url: req.url, headers: maskHeaders(req.headers), - }; - }, - }, -}); + }, + res: { + statusCode: res.statusCode, + headers: res.getHeaders ? res.getHeaders() : res.headers, + }, + requestBody: req.body, // Full request body without truncation + responseBody: res._capturedBody, // Full response body without truncation + responseTime, + }, 'request completed'); + }); + + // Still call base middleware to set up req.log + baseLoggingMiddleware(req, res, next); +} module.exports = loggingMiddleware; diff --git a/src/api/middleware/request-logging.js b/src/api/middleware/request-logging.js index 8352e1a..cf2709e 100644 --- a/src/api/middleware/request-logging.js +++ b/src/api/middleware/request-logging.js @@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) { // Add to response headers res.setHeader("X-Request-ID", requestId); - // Log request start +// Log request start with full body logger.info( { requestId, method: req.method, path: req.path || req.url, query: req.query, + body: req.body, // Full request body without truncation ip: req.ip || req.socket.remoteAddress, userAgent: req.headers["user-agent"], }, @@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) { res.send = function (body) { const duration = Date.now() - startTime; - // Log request completion + // Parse body if it's a string + let responseBody = body; + if (typeof body === 'string') { + try { + responseBody = JSON.parse(body); + } catch (e) { + // Keep as string if not JSON + responseBody = body; + } + } + + // Log request completion with full request and response bodies logger.info( { requestId, @@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) { status: res.statusCode, duration, contentLength: res.getHeader("content-length"), + requestBody: req.body, // Full request body for reference + responseBody, // Full response body without truncation }, "Request completed" ); diff --git a/src/api/router.js b/src/api/router.js index b3ed198..057341d 100644 --- a/src/api/router.js +++ b/src/api/router.js @@ -7,6 +7,7 @@ const openaiRouter = require("./openai-router"); const providersRouter = require("./providers-handler"); const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing"); const { validateCwd } = require("../workspace"); +const logger = require("../logger"); const router = express.Router(); @@ -121,6 +122,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream); const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0; + logger.info({ + sessionId: req.headers['x-claude-session-id'], + wantsStream, + hasTools, + willUseStreamingPath: wantsStream || hasTools + }, "=== REQUEST ROUTING DECISION ==="); + // Analyze complexity for routing headers (Phase 3) const complexity = analyzeComplexity(req.body); const routingHeaders = getRoutingHeaders({ @@ -338,6 +346,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { // Legacy streaming wrapper (for tool-based requests that requested streaming) if (wantsStream && hasTools) { + logger.info({ + sessionId: req.headers['x-claude-session-id'], + pathType: 'legacy_streaming_wrapper', + wantsStream, + hasTools + }, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ==="); + metrics.recordStreamingStart(); res.set({ "Content-Type": "text/event-stream", @@ -359,6 +374,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { // Use proper Anthropic SSE format const msg = result.body; + logger.info({ + sessionId: req.headers['x-claude-session-id'], + eventType: 'message_start', + streamingWithTools: true, + hasContent: !!(msg.content && msg.content.length > 0) + }, "=== SENDING SSE MESSAGE_START ==="); + // 1. message_start res.write(`event: message_start\n`); res.write(`data: ${JSON.stringify({ @@ -419,9 +441,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); + } else if (block.type === "tool_result") { + // === TOOL_RESULT SSE STREAMING - ENTERED === + logger.info({ + blockIndex: i, + blockType: block.type, + toolUseId: block.tool_use_id, + contentType: typeof block.content, + contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length + }, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ==="); + + // Stream tool_result blocks so CLI can display actual tool output + res.write(`event: content_block_start\n`); + res.write(`data: ${JSON.stringify({ + type: "content_block_start", + index: i, + content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" } + })}\n\n`); + + // Stream the actual content + const content = typeof block.content === 'string' + ? block.content + : JSON.stringify(block.content); + + logger.info({ + blockIndex: i, + contentLength: content.length, + contentPreview: content.substring(0, 200) + }, "=== SSE: STREAMING TOOL_RESULT CONTENT ==="); + + res.write(`event: content_block_delta\n`); + res.write(`data: ${JSON.stringify({ + type: "content_block_delta", + index: i, + delta: { type: "tool_result_delta", content: content } + })}\n\n`); + + res.write(`event: content_block_stop\n`); + res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); + + // === TOOL_RESULT SSE STREAMING - COMPLETED === + logger.info({ + blockIndex: i, + toolUseId: block.tool_use_id + }, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ==="); } } - // 3. message_delta with stop_reason res.write(`event: message_delta\n`); res.write(`data: ${JSON.stringify({ @@ -454,6 +519,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { }); } + + // DIAGNOSTIC: Log response being sent to client + logger.info({ + status: result.status, + hasBody: !!result.body, + bodyKeys: result.body ? Object.keys(result.body) : [], + bodyType: typeof result.body, + contentLength: result.body ? JSON.stringify(result.body).length : 0 + }, "=== SENDING RESPONSE TO CLIENT ==="); + metrics.recordResponse(result.status); res.status(result.status).send(result.body); } catch (error) { diff --git a/src/config/index.js b/src/config/index.js index 51cc548..d0b82ec 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -170,6 +170,7 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) { "TOOL_EXECUTION_MODE must be one of: server, client, passthrough (default: server)" ); } +console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`); // Memory system configuration (Titans-inspired long-term memory) const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true @@ -342,6 +343,8 @@ const databricksUrl = ? `${rawBaseUrl}${endpointPath.startsWith("/") ? "" : "/"}${endpointPath}` : null; +// Set MODEL_DEFAULT env var to use a specific model (e.g. "llama3.1" for Ollama). +// Without it, the default falls back to a Databricks Claude model regardless of MODEL_PROVIDER. const defaultModel = process.env.MODEL_DEFAULT ?? (modelProvider === "azure-anthropic" ? "claude-opus-4-5" : "databricks-claude-sonnet-4-5"); diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index 9596db6..bb2f9c5 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -919,6 +919,10 @@ function sanitizePayload(payload) { : "claude-opus-4-5"; clean.model = azureDefaultModel; } else if (providerType === "ollama") { + // Override client model with Ollama config model + const ollamaConfiguredModel = config.ollama?.model; + clean.model = ollamaConfiguredModel; + // Ollama format conversion // Check if model supports tools const { modelNameSupportsTools } = require("../clients/ollama-utils"); @@ -1024,8 +1028,15 @@ function sanitizePayload(payload) { } // Very short messages (< 20 chars) without code/technical keywords + // BUT: Common shell commands should NOT be treated as conversational + const shellCommands = /^(pwd|ls|cd|cat|echo|grep|find|ps|top|df|du|whoami|which|env)[\s\.\!\?]*$/; + if (shellCommands.test(trimmed)) { + logger.info({ matched: "shell_command", trimmed }, "Ollama conversational check - SHELL COMMAND detected, keeping tools"); + return false; // NOT conversational - needs tools! + } + if (trimmed.length < 20 && !/code|file|function|error|bug|fix|write|read|create/.test(trimmed)) { - logger.debug({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - matched"); + logger.warn({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - SHORT MESSAGE matched, DELETING TOOLS"); return true; } @@ -1035,13 +1046,15 @@ function sanitizePayload(payload) { if (isConversational) { // Strip all tools for simple conversational messages + const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0; delete clean.tools; delete clean.tool_choice; - logger.debug({ + logger.warn({ model: config.ollama?.model, - message: "Removed tools for conversational message" - }, "Ollama conversational mode"); - } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) { + message: "Removed tools for conversational message", + originalToolCount, + userMessage: clean.messages?.[clean.messages.length - 1]?.content?.substring(0, 50), + }, "Ollama conversational mode - ALL TOOLS DELETED!"); } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) { // Ollama performance degrades with too many tools // Limit to essential tools only const OLLAMA_ESSENTIAL_TOOLS = new Set([ @@ -1052,7 +1065,8 @@ function sanitizePayload(payload) { "Glob", "Grep", "WebSearch", - "WebFetch" + "WebFetch", + "shell", // Tool is registered as "shell" internally ]); const limitedTools = clean.tools.filter(tool => @@ -1342,6 +1356,20 @@ async function runAgentLoop({ const toolCallNames = new Map(); const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count let loopWarningInjected = false; // Track if we've already warned about loops + const accumulatedToolResults = []; // Track tool results to include in response for CLI display + + // Log agent loop start + logger.info( + { + sessionId: session?.id ?? null, + model: requestedModel, + maxSteps: settings.maxSteps, + maxDurationMs: settings.maxDurationMs, + wantsThinking, + providerType, + }, + "Agent loop started", + ); while (steps < settings.maxSteps) { if (Date.now() - start > settings.maxDurationMs) { @@ -1761,6 +1789,15 @@ IMPORTANT TOOL USAGE RULES: }); } } + logger.info({ + messageContent: databricksResponse.json?.message?.content + ? (typeof databricksResponse.json.message.content === 'string' + ? databricksResponse.json.message.content.substring(0, 500) + : JSON.stringify(databricksResponse.json.message.content).substring(0, 500)) + : 'NO_CONTENT', + hasToolCalls: !!databricksResponse.json?.message?.tool_calls, + toolCallCount: databricksResponse.json?.message?.tool_calls?.length || 0 + }, "=== RAW LLM RESPONSE CONTENT ==="); // Handle streaming responses (pass through without buffering) if (databricksResponse.stream) { @@ -1860,11 +1897,13 @@ IMPORTANT TOOL USAGE RULES: _anthropic_block: block, })); - logger.debug( + logger.info( { sessionId: session?.id ?? null, + step: steps, contentBlocks: contentArray.length, toolCallsFound: toolCalls.length, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), stopReason: databricksResponse.json?.stop_reason, }, "Azure Anthropic response parsed", @@ -1874,6 +1913,98 @@ IMPORTANT TOOL USAGE RULES: const choice = databricksResponse.json?.choices?.[0]; message = choice?.message ?? {}; toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + // Deduplicate tool calls for OpenAI format too + if (toolCalls.length > 0) { + const uniqueToolCalls = []; + const seenSignatures = new Set(); + let duplicatesRemoved = 0; + + for (const call of toolCalls) { + const signature = getToolCallSignature(call); + if (!seenSignatures.has(signature)) { + seenSignatures.add(signature); + uniqueToolCalls.push(call); + } else { + duplicatesRemoved++; + logger.warn({ + sessionId: session?.id ?? null, + toolName: call.function?.name || call.name, + toolId: call.id, + signature: signature.substring(0, 32), + }, "Duplicate tool call removed (same tool with identical parameters in single response)"); + } + } + + toolCalls = uniqueToolCalls; + + logger.info( + { + sessionId: session?.id ?? null, + step: steps, + toolCallsFound: toolCalls.length, + duplicatesRemoved, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), + }, + "LLM Response: Tool calls requested (after deduplication)", + ); + } else if (providerType === "ollama") { + // Ollama format: { message: { role, content, tool_calls }, done } + message = databricksResponse.json?.message ?? {}; + toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + logger.info({ + hasMessage: !!databricksResponse.json?.message, + hasToolCalls: toolCalls.length > 0, + toolCallCount: toolCalls.length, + toolNames: toolCalls.map(tc => tc.function?.name), + done: databricksResponse.json?.done, + fullToolCalls: JSON.stringify(toolCalls), + fullResponseMessage: JSON.stringify(databricksResponse.json?.message) + }, "=== OLLAMA TOOL CALLS EXTRACTION ==="); + } else { + // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] } + const choice = databricksResponse.json?.choices?.[0]; + message = choice?.message ?? {}; + toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + // Deduplicate tool calls for OpenAI format too + if (toolCalls.length > 0) { + const uniqueToolCalls = []; + const seenSignatures = new Set(); + let duplicatesRemoved = 0; + + for (const call of toolCalls) { + const signature = getToolCallSignature(call); + + if (!seenSignatures.has(signature)) { + seenSignatures.add(signature); + uniqueToolCalls.push(call); + } else { + duplicatesRemoved++; + logger.warn({ + sessionId: session?.id ?? null, + toolName: call.function?.name || call.name, + toolId: call.id, + signature: signature.substring(0, 32), + }, "Duplicate tool call removed (same tool with identical parameters in single response)"); + } + } + + toolCalls = uniqueToolCalls; + + logger.info( + { + sessionId: session?.id ?? null, + step: steps, + toolCallsFound: toolCalls.length, + duplicatesRemoved, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), + }, + "LLM Response: Tool calls requested (after deduplication)", + ); + } + } } if (toolCalls.length > 0) { @@ -2129,6 +2260,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, })) ); @@ -2175,6 +2307,15 @@ IMPORTANT TOOL USAGE RULES: cleanPayload.messages.push(toolMessage); + logger.info( + { + toolName: execution.name, + content: typeof toolMessage.content === 'string' + ? toolMessage.content.substring(0, 500) + : JSON.stringify(toolMessage.content).substring(0, 500) + }, "Tool result content sent to LLM", + ); + // Convert to Anthropic format for session storage let sessionToolResultContent; if (providerType === "azure-anthropic") { @@ -2362,6 +2503,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, }); let toolMessage; @@ -2452,6 +2594,39 @@ IMPORTANT TOOL USAGE RULES: }, }); + // Accumulate tool results for CLI display + // Build a standardized tool_result block in Anthropic format + logger.info({ + sessionId: session?.id ?? null, + callId: call.id, + executionId: execution.id, + toolName: call.function?.name ?? call.name ?? execution.name, + executionOk: execution.ok, + contentType: typeof execution.content, + accumulatedCountBefore: accumulatedToolResults.length + }, "=== ACCUMULATING TOOL RESULT FOR CLI - START ==="); + + const toolUseId = call.id ?? execution.id; + const toolResultContent = typeof execution.content === "string" + ? execution.content + : JSON.stringify(execution.content); + accumulatedToolResults.push({ + type: "tool_result", + tool_use_id: toolUseId, + tool_name: call.function?.name ?? call.name ?? execution.name, + content: toolResultContent, + is_error: execution.ok === false, + }); + + logger.info({ + sessionId: session?.id ?? null, + toolUseId, + toolName: call.function?.name ?? call.name ?? execution.name, + contentLength: toolResultContent.length, + contentPreview: toolResultContent.substring(0, 200), + accumulatedCountAfter: accumulatedToolResults.length + }, "=== ACCUMULATING TOOL RESULT FOR CLI - END ==="); + if (execution.ok) { logger.debug( { @@ -2574,11 +2749,11 @@ IMPORTANT TOOL USAGE RULES: // Return tool results directly to CLI - no more LLM call needed // The tool result IS the answer (e.g., file contents for Read) if (accumulatedToolResults.length > 0) { - auditLog("=== RETURNING TOOL RESULTS DIRECTLY TO CLI ===", { + logger.info({ sessionId: session?.id ?? null, toolResultCount: accumulatedToolResults.length, toolNames: accumulatedToolResults.map(r => r.tool_name) - }); + }, "=== RETURNING TOOL RESULTS DIRECTLY TO CLI ==="); // Convert tool_result blocks to text blocks for CLI display // The CLI only understands text/tool_use in responses, not tool_result @@ -2607,6 +2782,15 @@ IMPORTANT TOOL USAGE RULES: }; } + logger.info({ + sessionId: session?.id ?? null, + step: steps, + toolCallsExecuted: toolCallsExecuted, + totalToolCallsInThisStep: toolCalls.length, + messageCount: cleanPayload.messages.length, + lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role, + }, "Tool execution complete - processing next toolCall"); + continue; // Only if no tool results (shouldn't happen) } @@ -3054,6 +3238,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, }); const toolResultMessage = createFallbackToolResultMessage(providerType, { @@ -3196,6 +3381,100 @@ IMPORTANT TOOL USAGE RULES: }, "Agent loop completed successfully", ); + + // Include accumulated tool results in the response for CLI display + // This ensures the client sees actual tool output, not just LLM summaries + logger.info({ + sessionId: session?.id ?? null, + accumulatedToolResultsCount: accumulatedToolResults.length, + hasAnthropicPayload: !!anthropicPayload, + currentContentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none', + currentContentLength: anthropicPayload?.content?.length || 0 + }, "=== BEFORE TOOL RESULTS INCLUSION CHECK ==="); + + if (accumulatedToolResults.length > 0) { + logger.info({ + sessionId: session?.id ?? null, + toolResultCount: accumulatedToolResults.length, + toolNames: accumulatedToolResults.map(r => r.tool_name), + toolUseIds: accumulatedToolResults.map(r => r.tool_use_id) + }, "=== ENTERING TOOL RESULTS INCLUSION BLOCK ==="); + + // Ensure content is an array + if (!Array.isArray(anthropicPayload.content)) { + logger.info({ + sessionId: session?.id ?? null, + originalContentType: typeof anthropicPayload.content, + originalContentValue: anthropicPayload.content ? String(anthropicPayload.content).substring(0, 100) : 'null' + }, "=== CONTENT NOT ARRAY - CONVERTING ==="); + + anthropicPayload.content = anthropicPayload.content + ? [{ type: "text", text: String(anthropicPayload.content) }] + : []; + + logger.info({ + sessionId: session?.id ?? null, + convertedContentLength: anthropicPayload.content.length + }, "=== CONTENT CONVERTED TO ARRAY ==="); + } else { + logger.info({ + sessionId: session?.id ?? null, + existingContentLength: anthropicPayload.content.length, + existingContentTypes: anthropicPayload.content.map(b => b.type) + }, "=== CONTENT ALREADY ARRAY ==="); + } + + // Prepend tool results before text content so they appear in order + const contentBeforePrepend = anthropicPayload.content.length; + anthropicPayload.content = [...accumulatedToolResults, ...anthropicPayload.content]; + + logger.info({ + sessionId: session?.id ?? null, + toolResultCount: accumulatedToolResults.length, + toolNames: accumulatedToolResults.map(r => r.tool_name), + contentBeforePrepend, + contentAfterPrepend: anthropicPayload.content.length, + finalContentTypes: anthropicPayload.content.map(b => b.type) + }, "=== TOOL RESULTS PREPENDED TO RESPONSE ==="); + + for (const block of anthropicPayload.content) { + if (block.type === "tool_result") { + logger.info({ + toolName: block.tool_name, + content: typeof block.content === 'string' + ? block.content.substring(0, 500) + : JSON.stringify(block.content).substring(0, 500) + }, "=== TOOL RESULT CONTENT SENT TO CLI ==="); + } else if (block.type === "text") { + logger.info({ + text: block.text.substring(0, 500) + }, "=== TEXT CONTENT SENT TO CLI ==="); + } + } + + } else { + logger.info({ + sessionId: session?.id ?? null + }, "=== NO TOOL RESULTS TO INCLUDE (accumulatedToolResults empty) ==="); + } + + logger.info({ + sessionId: session?.id ?? null, + finalContentLength: anthropicPayload?.content?.length || 0, + finalContentTypes: anthropicPayload?.content?.map(b => b.type) || [] + }, "=== AFTER TOOL RESULTS INCLUSION CHECK ==="); + + // DIAGNOSTIC: Log response being returned + logger.info({ + sessionId: session?.id ?? null, + status: 200, + hasBody: !!anthropicPayload, + bodyKeys: anthropicPayload ? Object.keys(anthropicPayload) : [], + contentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none', + contentLength: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content).length) : 0, + stopReason: anthropicPayload?.stop_reason + }, "=== RETURNING RESPONSE TO CLIENT ==="); + return { response: { status: 200, diff --git a/src/tools/index.js b/src/tools/index.js index a6a6296..3d76777 100644 --- a/src/tools/index.js +++ b/src/tools/index.js @@ -94,6 +94,37 @@ const TOOL_ALIASES = { // Glob: "workspace_list", }; +/** + * Recursively parse string values that look like JSON arrays/objects. + * Some providers double-serialize nested parameters (e.g. questions: "[{...}]" + * instead of questions: [{...}]), which causes schema validation failures. + */ +function deepParseStringifiedJson(obj) { + if (typeof obj !== "object" || obj === null) return obj; + if (Array.isArray(obj)) return obj.map(deepParseStringifiedJson); + + const result = {}; + for (const [key, value] of Object.entries(obj)) { + if (typeof value === "string") { + const trimmed = value.trim(); + if ( + (trimmed.startsWith("[") && trimmed.endsWith("]")) || + (trimmed.startsWith("{") && trimmed.endsWith("}")) + ) { + try { + result[key] = deepParseStringifiedJson(JSON.parse(trimmed)); + continue; + } catch { + // Not valid JSON, keep as string + } + } + } + result[key] = + typeof value === "object" ? deepParseStringifiedJson(value) : value; + } + return result; +} + function coerceString(value) { if (value === undefined || value === null) return ""; if (typeof value === "string") return value; @@ -128,24 +159,65 @@ function normalizeHandlerResult(result) { return { ok, status, content, metadata }; } -function parseArguments(call) { +function parseArguments(call, providerType = null) { const raw = call?.function?.arguments; - if (typeof raw !== "string" || raw.trim().length === 0) return {}; + + // DEBUG: Log full call structure for diagnosis + logger.info({ + providerType, + fullCall: JSON.stringify(call), + hasFunction: !!call?.function, + functionKeys: call?.function ? Object.keys(call.function) : [], + argumentsType: typeof raw, + argumentsValue: raw, + argumentsIsNull: raw === null, + argumentsIsUndefined: raw === undefined, + }, "=== PARSING TOOL ARGUMENTS ==="); + + // Ollama sends arguments as an object, OpenAI as a JSON string + if (typeof raw === "object" && raw !== null) { + if (providerType !== "ollama") { + logger.warn({ + providerType, + expectedProvider: "ollama", + argumentsType: typeof raw, + arguments: raw + }, `Received object arguments but provider is ${providerType || "unknown"}, expected ollama format. Continuing with object.`); + } else { + logger.info({ + type: "object", + arguments: raw + }, "Tool arguments already parsed (Ollama format)"); + } + return deepParseStringifiedJson(raw); + } + + if (typeof raw !== "string" || raw.trim().length === 0) { + logger.warn({ + argumentsType: typeof raw, + argumentsEmpty: !raw || raw.trim().length === 0, + providerType + }, "Arguments not a string or empty - returning {}"); + return {}; + } + try { - return JSON.parse(raw); + const parsed = JSON.parse(raw); + logger.info({ parsed }, "Parsed JSON string arguments"); + return deepParseStringifiedJson(parsed); } catch (err) { - logger.warn({ err }, "Failed to parse tool arguments"); + logger.warn({ err, raw }, "Failed to parse tool arguments"); return {}; } } -function normaliseToolCall(call) { +function normaliseToolCall(call, providerType = null) { const name = call?.function?.name ?? call?.name; const id = call?.id ?? `${name ?? "tool"}_${Date.now()}`; return { id, name, - arguments: parseArguments(call), + arguments: parseArguments(call, providerType), raw: call, }; } @@ -186,7 +258,8 @@ function listTools() { } async function executeToolCall(call, context = {}) { - const normalisedCall = normaliseToolCall(call); + const providerType = context?.providerType || context?.provider || null; + const normalisedCall = normaliseToolCall(call, providerType); let registered = registry.get(normalisedCall.name); if (!registered) { const aliasTarget = TOOL_ALIASES[normalisedCall.name.toLowerCase()]; @@ -229,6 +302,10 @@ async function executeToolCall(call, context = {}) { } if (!registered) { + logger.warn({ + tool: normalisedCall.name, + id: normalisedCall.id + }, "Tool not registered"); const content = coerceString({ error: "tool_not_registered", tool: normalisedCall.name, @@ -245,6 +322,17 @@ async function executeToolCall(call, context = {}) { }; } + // Log tool invocation with full details for debugging + logger.info({ + tool: normalisedCall.name, + id: normalisedCall.id, + args: normalisedCall.arguments, + argsKeys: Object.keys(normalisedCall.arguments || {}), + rawCall: JSON.stringify(normalisedCall.raw) + }, "=== EXECUTING TOOL ==="); + + startTime = Date.now() + try { const result = await registered.handler( { @@ -260,6 +348,18 @@ async function executeToolCall(call, context = {}) { // Apply tool output truncation for token efficiency const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content); + const durationMs = Date.now() - startTime; + + // Log successful execution + logger.info({ + tool: normalisedCall.name, + id: normalisedCall.id, + status: formatted.status, + durationMs, + outputLength: truncatedContent?.length || 0, + truncated: truncatedContent !== formatted.content + }, "Tool execution completed"); + return { id: normalisedCall.id, name: normalisedCall.name, @@ -271,11 +371,20 @@ async function executeToolCall(call, context = {}) { registered: true, truncated: truncatedContent !== formatted.content, originalLength: formatted.content?.length, - truncatedLength: truncatedContent?.length + truncatedLength: truncatedContent?.length, + durationMs }, }; } catch (err) { - logger.error({ err, tool: normalisedCall.name }, "Tool execution failed"); + const durationMs = Date.now() - startTime; + + logger.error({ + err, + tool: normalisedCall.name, + id: normalisedCall.id, + durationMs + }, "Tool execution failed"); + return { id: normalisedCall.id, name: normalisedCall.name, @@ -290,6 +399,7 @@ async function executeToolCall(call, context = {}) { metadata: { registered: true, error: true, + durationMs }, error: err, }; diff --git a/src/tools/stubs.js b/src/tools/stubs.js index c026e8e..d2f1bd3 100644 --- a/src/tools/stubs.js +++ b/src/tools/stubs.js @@ -41,12 +41,41 @@ function createStubHandler(name, description) { }); } +function askUserQuestionHandler({ args }) { + let questions = args?.questions ?? []; + + if (typeof questions === "string") { + try { questions = JSON.parse(questions); } catch { questions = []; } + } + + if (!Array.isArray(questions)) questions = [questions]; + const lines = questions.map((q, i) => { + const header = q.header ? `[${q.header}] ` : ""; + const opts = (q.options ?? []) + .map((o, j) => ` ${j + 1}. ${o.label} — ${o.description}`) + .join("\n"); + return `${header}${q.question}\n${opts}`; + }); + + return { + ok: true, + status: 200, + content: lines.join("\n\n"), + }; +} + function registerStubTools() { STUB_TOOLS.forEach((tool) => { if (!hasTool(tool.name)) { registerTool(tool.name, createStubHandler(tool.name, tool.description), tool); } }); + + if (!hasTool("AskUserQuestion")) { + registerTool("AskUserQuestion", askUserQuestionHandler, { + description: "Returns the model's question to the user as assistant output.", + }); + } } module.exports = { diff --git a/src/tools/workspace.js b/src/tools/workspace.js index 144c6c1..a020a2d 100644 --- a/src/tools/workspace.js +++ b/src/tools/workspace.js @@ -30,7 +30,7 @@ function registerWorkspaceTools() { registerTool( "fs_read", async ({ args = {} }) => { - const relativePath = validateString(args.path ?? args.file, "path"); + const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path"); const encoding = normalizeEncoding(args.encoding); const content = await readFile(relativePath, encoding); return { @@ -114,7 +114,7 @@ function registerWorkspaceTools() { registerTool( "edit_patch", async ({ args = {} }, context = {}) => { - const relativePath = validateString(args.path ?? args.file, "path"); + const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path"); const patch = validateString(args.patch, "patch"); const encoding = normalizeEncoding(args.encoding);