diff --git a/.env.example b/.env.example index a462e81..b85da41 100644 --- a/.env.example +++ b/.env.example @@ -212,6 +212,14 @@ WORKSPACE_INDEX_ENABLED=true # - client/passthrough: Return tool calls to CLI for local execution TOOL_EXECUTION_MODE=server +# Suggestion mode model override +# Controls which model handles suggestion mode (predicting next user input). +# Values: +# default - Use the same model as MODEL_PROVIDER (no change) +# none - Skip suggestion mode LLM calls entirely (saves GPU time) +# - Use a specific model (e.g. "llama3.1" for a lighter model) +SUGGESTION_MODE_MODEL=default + # Enable/disable automatic tool injection for local models INJECT_TOOLS_LLAMACPP=true INJECT_TOOLS_OLLAMA=true diff --git a/src/api/middleware/logging.js b/src/api/middleware/logging.js index e53faee..dc72b03 100644 --- a/src/api/middleware/logging.js +++ b/src/api/middleware/logging.js @@ -12,26 +12,92 @@ function maskHeaders(headers = {}) { return clone; } -const loggingMiddleware = pinoHttp({ +const baseLoggingMiddleware = pinoHttp({ logger, - customProps: (req) => ({ + autoLogging: false, // Disable automatic logging so we can log manually with bodies + customProps: (req, res) => ({ sessionId: req.sessionId ?? null, }), - customLogLevel: (req, res, err) => { - if (err || res.statusCode >= 500) return "error"; - if (res.statusCode >= 400) return "warn"; - return "info"; - }, - wrapSerializers: true, - serializers: { - req(req) { - return { +}); + +// Wrapper middleware to capture and log full request/response bodies +function loggingMiddleware(req, res, next) { + const startTime = Date.now(); + + // Log request with full body immediately + logger.info({ + sessionId: req.sessionId ?? null, + req: { + method: req.method, + url: req.url, + headers: maskHeaders(req.headers), + }, + requestBody: req.body, // Full request body without truncation + }, 'request started'); + + // Intercept res.write for streaming responses + const originalWrite = res.write; + const chunks = []; + res.write = function (chunk) { + if (chunk) { + chunks.push(Buffer.from(chunk)); + } + return originalWrite.apply(this, arguments); + }; + + // Intercept res.send to capture the body + const originalSend = res.send; + res.send = function (body) { + res._capturedBody = body; + + // Parse if it's a JSON string for better logging + if (typeof body === 'string') { + try { + res._capturedBody = JSON.parse(body); + } catch (e) { + res._capturedBody = body; + } + } + + return originalSend.call(this, body); + }; + + // Log response when finished + res.on('finish', () => { + const responseTime = Date.now() - startTime; + + // Capture streaming body if not already captured via send() + if (chunks.length > 0 && !res._capturedBody) { + const fullBody = Buffer.concat(chunks).toString('utf8'); + res._capturedBody = { + type: 'stream', + contentType: res.getHeader('content-type'), + size: fullBody.length, + preview: fullBody.substring(0, 1000) + }; + } + + const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info'; + + logger[logLevel]({ + sessionId: req.sessionId ?? null, + req: { method: req.method, url: req.url, headers: maskHeaders(req.headers), - }; - }, - }, -}); + }, + res: { + statusCode: res.statusCode, + headers: res.getHeaders ? res.getHeaders() : res.headers, + }, + requestBody: req.body, // Full request body without truncation + responseBody: res._capturedBody, // Full response body without truncation + responseTime, + }, 'request completed'); + }); + + // Still call base middleware to set up req.log + baseLoggingMiddleware(req, res, next); +} module.exports = loggingMiddleware; diff --git a/src/api/middleware/request-logging.js b/src/api/middleware/request-logging.js index 8352e1a..cf2709e 100644 --- a/src/api/middleware/request-logging.js +++ b/src/api/middleware/request-logging.js @@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) { // Add to response headers res.setHeader("X-Request-ID", requestId); - // Log request start +// Log request start with full body logger.info( { requestId, method: req.method, path: req.path || req.url, query: req.query, + body: req.body, // Full request body without truncation ip: req.ip || req.socket.remoteAddress, userAgent: req.headers["user-agent"], }, @@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) { res.send = function (body) { const duration = Date.now() - startTime; - // Log request completion + // Parse body if it's a string + let responseBody = body; + if (typeof body === 'string') { + try { + responseBody = JSON.parse(body); + } catch (e) { + // Keep as string if not JSON + responseBody = body; + } + } + + // Log request completion with full request and response bodies logger.info( { requestId, @@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) { status: res.statusCode, duration, contentLength: res.getHeader("content-length"), + requestBody: req.body, // Full request body for reference + responseBody, // Full response body without truncation }, "Request completed" ); diff --git a/src/api/router.js b/src/api/router.js index b3ed198..057341d 100644 --- a/src/api/router.js +++ b/src/api/router.js @@ -7,6 +7,7 @@ const openaiRouter = require("./openai-router"); const providersRouter = require("./providers-handler"); const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing"); const { validateCwd } = require("../workspace"); +const logger = require("../logger"); const router = express.Router(); @@ -121,6 +122,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream); const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0; + logger.info({ + sessionId: req.headers['x-claude-session-id'], + wantsStream, + hasTools, + willUseStreamingPath: wantsStream || hasTools + }, "=== REQUEST ROUTING DECISION ==="); + // Analyze complexity for routing headers (Phase 3) const complexity = analyzeComplexity(req.body); const routingHeaders = getRoutingHeaders({ @@ -338,6 +346,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { // Legacy streaming wrapper (for tool-based requests that requested streaming) if (wantsStream && hasTools) { + logger.info({ + sessionId: req.headers['x-claude-session-id'], + pathType: 'legacy_streaming_wrapper', + wantsStream, + hasTools + }, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ==="); + metrics.recordStreamingStart(); res.set({ "Content-Type": "text/event-stream", @@ -359,6 +374,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { // Use proper Anthropic SSE format const msg = result.body; + logger.info({ + sessionId: req.headers['x-claude-session-id'], + eventType: 'message_start', + streamingWithTools: true, + hasContent: !!(msg.content && msg.content.length > 0) + }, "=== SENDING SSE MESSAGE_START ==="); + // 1. message_start res.write(`event: message_start\n`); res.write(`data: ${JSON.stringify({ @@ -419,9 +441,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); + } else if (block.type === "tool_result") { + // === TOOL_RESULT SSE STREAMING - ENTERED === + logger.info({ + blockIndex: i, + blockType: block.type, + toolUseId: block.tool_use_id, + contentType: typeof block.content, + contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length + }, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ==="); + + // Stream tool_result blocks so CLI can display actual tool output + res.write(`event: content_block_start\n`); + res.write(`data: ${JSON.stringify({ + type: "content_block_start", + index: i, + content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" } + })}\n\n`); + + // Stream the actual content + const content = typeof block.content === 'string' + ? block.content + : JSON.stringify(block.content); + + logger.info({ + blockIndex: i, + contentLength: content.length, + contentPreview: content.substring(0, 200) + }, "=== SSE: STREAMING TOOL_RESULT CONTENT ==="); + + res.write(`event: content_block_delta\n`); + res.write(`data: ${JSON.stringify({ + type: "content_block_delta", + index: i, + delta: { type: "tool_result_delta", content: content } + })}\n\n`); + + res.write(`event: content_block_stop\n`); + res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); + + // === TOOL_RESULT SSE STREAMING - COMPLETED === + logger.info({ + blockIndex: i, + toolUseId: block.tool_use_id + }, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ==="); } } - // 3. message_delta with stop_reason res.write(`event: message_delta\n`); res.write(`data: ${JSON.stringify({ @@ -454,6 +519,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => { }); } + + // DIAGNOSTIC: Log response being sent to client + logger.info({ + status: result.status, + hasBody: !!result.body, + bodyKeys: result.body ? Object.keys(result.body) : [], + bodyType: typeof result.body, + contentLength: result.body ? JSON.stringify(result.body).length : 0 + }, "=== SENDING RESPONSE TO CLIENT ==="); + metrics.recordResponse(result.status); res.status(result.status).send(result.body); } catch (error) { diff --git a/src/clients/databricks.js b/src/clients/databricks.js index d2d0d03..09fc176 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -181,7 +181,7 @@ async function invokeDatabricks(body) { const databricksBody = { ...body }; // Inject standard tools if client didn't send any (passthrough mode) - if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) { + if (!body._noToolInjection && (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0)) { databricksBody.tools = STANDARD_TOOLS; logger.info({ injectedToolCount: STANDARD_TOOLS.length, @@ -222,7 +222,7 @@ async function invokeAzureAnthropic(body) { } // Inject standard tools if client didn't send any (passthrough mode) - if (!Array.isArray(body.tools) || body.tools.length === 0) { + if (!body._noToolInjection && (!Array.isArray(body.tools) || body.tools.length === 0)) { body.tools = STANDARD_TOOLS; logger.info({ injectedToolCount: STANDARD_TOOLS.length, @@ -309,7 +309,7 @@ async function invokeOllama(body) { } const ollamaBody = { - model: config.ollama.model, + model: body._suggestionModeModel || config.ollama.model, messages: deduplicated, stream: false, // Force non-streaming for Ollama - streaming format conversion not yet implemented options: { @@ -331,7 +331,7 @@ async function invokeOllama(body) { if (!supportsTools) { // Model doesn't support tools - don't inject them toolsToSend = null; - } else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { + } else if (injectToolsOllama && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Model supports tools and none provided - inject them toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -399,7 +399,7 @@ async function invokeOpenRouter(body) { } const openRouterBody = { - model: config.openrouter.model, + model: body._suggestionModeModel || config.openrouter.model, messages, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 4096, @@ -411,7 +411,7 @@ async function invokeOpenRouter(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -485,14 +485,14 @@ async function invokeAzureOpenAI(body) { max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit top_p: body.top_p ?? 1.0, stream: false, // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented - model: config.azureOpenAI.deployment + model: body._suggestionModeModel || config.azureOpenAI.deployment }; // Add tools - inject standard tools if client didn't send any (passthrough mode) let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -831,7 +831,7 @@ async function invokeOpenAI(body) { } const openAIBody = { - model: config.openai.model || "gpt-4o", + model: body._suggestionModeModel || config.openai.model || "gpt-4o", messages, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 4096, @@ -843,7 +843,7 @@ async function invokeOpenAI(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; @@ -945,7 +945,7 @@ async function invokeLlamaCpp(body) { let toolsInjected = false; const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false"; - if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { + if (injectToolsLlamacpp && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.info({ @@ -1028,7 +1028,7 @@ async function invokeLMStudio(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.info({ @@ -1075,7 +1075,7 @@ async function invokeBedrock(body) { let toolsToSend = body.tools; let toolsInjected = false; - if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { + if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.info({ @@ -1359,7 +1359,7 @@ async function invokeZai(body) { zaiBody.model = mappedModel; // Inject standard tools if client didn't send any (passthrough mode) - if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) { + if (!body._noToolInjection && (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0)) { zaiBody.tools = STANDARD_TOOLS; logger.info({ injectedToolCount: STANDARD_TOOLS.length, diff --git a/src/clients/ollama-utils.js b/src/clients/ollama-utils.js index 7582f05..2cd95c9 100644 --- a/src/clients/ollama-utils.js +++ b/src/clients/ollama-utils.js @@ -93,6 +93,65 @@ function convertAnthropicToolsToOllama(anthropicTools) { })); } +/** + * Extract tool call from text when LLM outputs JSON instead of using tool_calls + * Handles formats like: {"name": "Read", "parameters": {...}} + * + * @param {string} text - Text content that may contain JSON tool call + * @returns {object|null} - Tool call object in Ollama format, or null if not found + */ +function extractToolCallFromText(text) { + if (!text || typeof text !== 'string') return null; + + // Find potential JSON start - look for {"name" pattern + const startMatch = text.match(/\{\s*"name"\s*:/); + if (!startMatch) return null; + + const startIdx = startMatch.index; + + // Find matching closing brace using brace counting + let braceCount = 0; + let endIdx = -1; + for (let i = startIdx; i < text.length; i++) { + if (text[i] === '{') braceCount++; + else if (text[i] === '}') { + braceCount--; + if (braceCount === 0) { + endIdx = i + 1; + break; + } + } + } + + if (endIdx === -1) return null; + + const jsonStr = text.substring(startIdx, endIdx); + + try { + const parsed = JSON.parse(jsonStr); + + if (!parsed.name || !parsed.parameters) { + return null; + } + + logger.info({ + toolName: parsed.name, + params: parsed.parameters, + originalText: text.substring(0, 200) + }, "Extracted tool call from text content (fallback parsing)"); + + return { + function: { + name: parsed.name, + arguments: parsed.parameters + } + }; + } catch (e) { + logger.debug({ error: e.message, text: text.substring(0, 200) }, "Failed to parse extracted tool call"); + return null; + } +} + /** * Convert Ollama tool call response to Anthropic format * @@ -126,6 +185,15 @@ function convertOllamaToolCallsToAnthropic(ollamaResponse) { const toolCalls = message.tool_calls || []; const textContent = message.content || ""; + // FALLBACK: If no tool_calls but text contains JSON tool call, parse it + if (toolCalls.length === 0 && textContent) { + const extracted = extractToolCallFromText(textContent); + if (extracted) { + logger.info({ extractedTool: extracted.function?.name }, "Using fallback text parsing for tool call"); + toolCalls = [extracted]; + } + } + const contentBlocks = []; // Add text content if present @@ -217,4 +285,5 @@ module.exports = { convertOllamaToolCallsToAnthropic, buildAnthropicResponseFromOllama, modelNameSupportsTools, + extractToolCallFromText, }; diff --git a/src/clients/retry.js b/src/clients/retry.js index 2178206..5d90654 100644 --- a/src/clients/retry.js +++ b/src/clients/retry.js @@ -10,7 +10,7 @@ const DEFAULT_CONFIG = { backoffMultiplier: 2, jitterFactor: 0.1, // 10% jitter retryableStatuses: [429, 500, 502, 503, 504], - retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH'], + retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH', 'ECONNREFUSED'], }; /** @@ -44,6 +44,11 @@ function isRetryable(error, response, config) { return true; } + // Check nested cause (Node undici wraps connection errors as TypeError) + if (error && error.cause?.code && config.retryableErrors.includes(error.cause.code)) { + return true; + } + // Check for network errors if (error && (error.name === 'FetchError' || error.name === 'AbortError')) { return true; diff --git a/src/clients/standard-tools.js b/src/clients/standard-tools.js index 51e4163..6cfd833 100644 --- a/src/clients/standard-tools.js +++ b/src/clients/standard-tools.js @@ -24,13 +24,17 @@ const STANDARD_TOOLS = [ }, { name: "Read", - description: "Reads a file from the local filesystem. You can access any file directly by using this tool.", + description: "Reads a file from the local filesystem. You can access any file directly by using this tool.\n\nEXTERNAL FILE APPROVAL FLOW: When reading a file outside the workspace, the tool will return an [APPROVAL REQUIRED] message instead of the file content. When this happens you MUST: (1) Tell the user the file is outside the workspace and ask for permission. (2) If the user approves, call this tool again with the SAME file_path and set user_approved=true. (3) Only then will the file content be returned.", input_schema: { type: "object", properties: { file_path: { type: "string", - description: "Relative path within workspace (e.g., 'config.js', 'src/index.ts'). DO NOT use absolute paths." + description: "Path to the file. Use relative paths for workspace files (e.g., 'src/index.ts'). For files outside the workspace use absolute paths or ~ for the home directory (e.g., '~/Documents/notes.md', '/etc/hosts'). Each call reads ONE file only — do not pass multiple paths." + }, + user_approved: { + type: "boolean", + description: "Set to true ONLY after the user has explicitly approved reading a file outside the workspace. Never set this to true without asking the user first." }, limit: { type: "number", diff --git a/src/config/index.js b/src/config/index.js index 51cc548..9da383a 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -1,7 +1,9 @@ const path = require("path"); const dotenv = require("dotenv"); -dotenv.config(); +// .env must be authoritative over shell env vars (e.g. stale exports in .bashrc). +// Skip override in test mode so tests can set process.env before requiring config. +dotenv.config({ override: process.env.NODE_ENV !== "test" }); function trimTrailingSlash(value) { if (typeof value !== "string") return value; @@ -134,6 +136,14 @@ const zaiModel = process.env.ZAI_MODEL?.trim() || "GLM-4.7"; const vertexApiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null; const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash"; +// Suggestion mode model override +// Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name +const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim(); + +// Topic detection model override +// Values: "default" (use main model) or a model name to redirect topic detection to a lighter model +const topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim(); + // Hot reload configuration const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10); @@ -170,6 +180,13 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) { "TOOL_EXECUTION_MODE must be one of: server, client, passthrough (default: server)" ); } +console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`); +if (suggestionModeModel.toLowerCase() !== "default") { + console.log(`[CONFIG] Suggestion mode model: ${suggestionModeModel}`); +} +if (topicDetectionModel.toLowerCase() !== "default") { + console.log(`[CONFIG] Topic detection model: ${topicDetectionModel}`); +} // Memory system configuration (Titans-inspired long-term memory) const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true @@ -342,6 +359,8 @@ const databricksUrl = ? `${rawBaseUrl}${endpointPath.startsWith("/") ? "" : "/"}${endpointPath}` : null; +// Set MODEL_DEFAULT env var to use a specific model (e.g. "llama3.1" for Ollama). +// Without it, the default falls back to a Databricks Claude model regardless of MODEL_PROVIDER. const defaultModel = process.env.MODEL_DEFAULT ?? (modelProvider === "azure-anthropic" ? "claude-opus-4-5" : "databricks-claude-sonnet-4-5"); @@ -592,6 +611,8 @@ var config = { modelProvider: { type: modelProvider, defaultModel, + suggestionModeModel, + topicDetectionModel, // Hybrid routing settings preferOllama, fallbackEnabled, @@ -881,6 +902,8 @@ function reloadConfig() { config.modelProvider.preferOllama = process.env.PREFER_OLLAMA === "true"; config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false"; config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase(); + config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim(); + config.modelProvider.topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim(); // Log level config.logger.level = process.env.LOG_LEVEL ?? "info"; diff --git a/src/context/compression.js b/src/context/compression.js index 518aaba..47b0413 100644 --- a/src/context/compression.js +++ b/src/context/compression.js @@ -2,24 +2,63 @@ * History Compression for Token Optimization * * Compresses conversation history to reduce token usage while - * maintaining context quality. Uses sliding window approach: - * - Keep recent turns verbatim - * - Summarize older turns - * - Compress tool results + * maintaining context quality. Uses sliding window approach with + * percentage-based tiered compression that scales with recency + * and the model's context window size. * + * Tiers: + * - veryRecent (last 4 messages): keep 90% of content + * - recent (messages 5-10): keep 50% of content + * - old (11+): keep 20% of content */ const logger = require('../logger'); const config = require('../config'); +// Compression tiers: ratio = percentage of content to keep, minFloor = minimum chars +const COMPRESSION_TIERS = { + veryRecent: { ratio: 0.9, minFloor: 500 }, + recent: { ratio: 0.5, minFloor: 300 }, + old: { ratio: 0.2, minFloor: 200 }, +}; + +// How many of the recent messages count as "very recent" +const VERY_RECENT_COUNT = 4; + /** - * Compress conversation history to fit within token budget + * Compute the maximum character cap for a tier based on context window size. + * + * @param {number} contextWindowTokens - Model's context window in tokens (-1 = unknown) + * @param {string} tierName - "veryRecent", "recent", or "old" + * @returns {number} Maximum characters for tool result content in this tier + */ +function computeMaxCap(contextWindowTokens, tierName) { + // Convert tokens to chars (~4 chars/token), default to 8K tokens if unknown + const contextChars = (contextWindowTokens === -1 ? 8000 : contextWindowTokens) * 4; + const budgetRatios = { + veryRecent: 0.25, + recent: 0.10, + old: 0.03, + }; + return Math.floor(contextChars * (budgetRatios[tierName] ?? 0.03)); +} + +/** + * Compute the character limit for a piece of content based on tier and context window. * - * Strategy: - * 1. Keep last N turns verbatim (fresh context) - * 2. Summarize older turns (compressed history) - * 3. Compress tool results to key information only - * 4. Remove redundant exchanges + * @param {string} text - The text content + * @param {string} tierName - Tier name + * @param {number} contextWindowTokens - Context window in tokens + * @returns {number} Character limit + */ +function computeLimit(text, tierName, contextWindowTokens) { + const tier = COMPRESSION_TIERS[tierName] || COMPRESSION_TIERS.old; + const maxCap = computeMaxCap(contextWindowTokens, tierName); + return Math.min(maxCap, Math.max(tier.minFloor, Math.floor(text.length * tier.ratio))); +} + +/** + * Compress conversation history to fit within token budget * * @param {Array} messages - Conversation history * @param {Object} options - Compression options @@ -28,6 +67,8 @@ const config = require('../config'); function compressHistory(messages, options = {}) { if (!messages || messages.length === 0) return messages; + const contextWindowTokens = options.contextWindowTokens ?? -1; + const opts = { keepRecentTurns: options.keepRecentTurns ?? config.historyCompression?.keepRecentTurns ?? 10, summarizeOlder: options.summarizeOlder ?? config.historyCompression?.summarizeOlder ?? true, @@ -58,12 +99,16 @@ function compressHistory(messages, options = {}) { compressed.push(summary); } } else { - // Just compress tool results in old messages - compressed = oldMessages.map(msg => compressMessage(msg)); + // Compress tool results in old messages using "old" tier + compressed = oldMessages.map(msg => compressMessage(msg, "old", contextWindowTokens)); } - // Add recent messages (may compress tool results but keep content) - const recentCompressed = recentMessages.map(msg => compressToolResults(msg)); + // Add recent messages with tiered compression + const recentCompressed = recentMessages.map((msg, i) => { + const isVeryRecent = i >= recentMessages.length - VERY_RECENT_COUNT; + const tierName = isVeryRecent ? "veryRecent" : "recent"; + return compressToolResults(msg, tierName, contextWindowTokens); + }); const finalMessages = [...compressed, ...recentCompressed]; @@ -82,7 +127,8 @@ function compressHistory(messages, options = {}) { percentage: ((saved / originalLength) * 100).toFixed(1), splitIndex, oldMessages: oldMessages.length, - recentMessages: recentMessages.length + recentMessages: recentMessages.length, + contextWindowTokens, }, 'History compression applied'); } @@ -149,26 +195,28 @@ function summarizeOldHistory(messages) { } /** - * Compress a single message - * - * Reduces message size while preserving essential information. + * Compress a single message (used for old messages outside the recent window) * * @param {Object} message - Message to compress + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object} Compressed message */ -function compressMessage(message) { +function compressMessage(message, tierName = "old", contextWindowTokens = -1) { if (!message) return message; + const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens); + const compressed = { role: message.role }; // Compress content based on type if (typeof message.content === 'string') { - compressed.content = compressText(message.content, 300); + compressed.content = compressText(message.content, limit); } else if (Array.isArray(message.content)) { compressed.content = message.content - .map(block => compressContentBlock(block)) + .map(block => compressContentBlock(block, tierName, contextWindowTokens)) .filter(Boolean); } else { compressed.content = message.content; @@ -180,13 +228,12 @@ function compressMessage(message) { /** * Compress tool results in a message while keeping other content * - * Tool results can be very large. This compresses them while - * keeping user and assistant text intact. - * * @param {Object} message - Message to process + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object} Message with compressed tool results */ -function compressToolResults(message) { +function compressToolResults(message, tierName = "recent", contextWindowTokens = -1) { if (!message) return message; const compressed = { @@ -199,7 +246,7 @@ function compressToolResults(message) { compressed.content = message.content.map(block => { // Compress tool_result blocks if (block.type === 'tool_result') { - return compressToolResultBlock(block); + return compressToolResultBlock(block, tierName, contextWindowTokens); } // Keep other blocks as-is return block; @@ -215,16 +262,20 @@ function compressToolResults(message) { * Compress a content block * * @param {Object} block - Content block + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object|null} Compressed block or null if removed */ -function compressContentBlock(block) { +function compressContentBlock(block, tierName = "old", contextWindowTokens = -1) { if (!block) return null; + const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens); + switch (block.type) { case 'text': return { type: 'text', - text: compressText(block.text, 300) + text: compressText(block.text, limit) }; case 'tool_use': @@ -237,7 +288,7 @@ function compressContentBlock(block) { }; case 'tool_result': - return compressToolResultBlock(block); + return compressToolResultBlock(block, tierName, contextWindowTokens); default: return block; @@ -247,13 +298,15 @@ function compressContentBlock(block) { /** * Compress tool result block * - * Tool results can be very large (file contents, bash output). - * Compress while preserving essential information. + * Uses dynamic limits based on compression tier and context window size + * instead of a hardcoded character limit. * * @param {Object} block - tool_result block + * @param {string} tierName - Compression tier + * @param {number} contextWindowTokens - Context window in tokens * @returns {Object} Compressed tool_result */ -function compressToolResultBlock(block) { +function compressToolResultBlock(block, tierName = "old", contextWindowTokens = -1) { if (!block || block.type !== 'tool_result') return block; const compressed = { @@ -261,17 +314,20 @@ function compressToolResultBlock(block) { tool_use_id: block.tool_use_id, }; - // Compress content + // Compress content using dynamic limits if (typeof block.content === 'string') { - compressed.content = compressText(block.content, 500); + const limit = computeLimit(block.content, tierName, contextWindowTokens); + compressed.content = compressText(block.content, limit); } else if (Array.isArray(block.content)) { compressed.content = block.content.map(item => { if (typeof item === 'string') { - return compressText(item, 500); + const limit = computeLimit(item, tierName, contextWindowTokens); + return compressText(item, limit); } else if (item.type === 'text') { + const limit = computeLimit(item.text || "", tierName, contextWindowTokens); return { type: 'text', - text: compressText(item.text, 500) + text: compressText(item.text, limit) }; } return item; @@ -456,4 +512,6 @@ module.exports = { calculateCompressionStats, needsCompression, summarizeOldHistory, + COMPRESSION_TIERS, + computeMaxCap, }; diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js index d553b69..3e07ac2 100644 --- a/src/orchestrator/index.js +++ b/src/orchestrator/index.js @@ -10,6 +10,7 @@ const tokens = require("../utils/tokens"); const systemPrompt = require("../prompts/system"); const historyCompression = require("../context/compression"); const tokenBudget = require("../context/budget"); +const { getContextWindow } = require("../providers/context-window"); const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection"); const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom"); const { createAuditLogger } = require("../logger/audit-logger"); @@ -669,53 +670,11 @@ function normaliseToolChoice(choice) { } /** - * Strip thinking-style reasoning from Ollama model outputs - * Patterns to remove: - * - Lines starting with bullet points (●, •, -, *) - * - Explanatory reasoning before the actual response - * - Multiple newlines used to separate thinking from response + * Strip ... tags that some models (DeepSeek, Qwen) emit for chain-of-thought reasoning. */ -function stripThinkingBlocks(text) { +function stripThinkTags(text) { if (typeof text !== "string") return text; - - // Split into lines - const lines = text.split("\n"); - const cleanedLines = []; - let inThinkingBlock = false; - let consecutiveEmptyLines = 0; - - for (const line of lines) { - const trimmed = line.trim(); - - // Detect thinking block markers (bullet points followed by reasoning) - if (/^[●•\-\*]\s/.test(trimmed)) { - inThinkingBlock = true; - continue; - } - - // Empty lines might separate thinking from response - if (trimmed === "") { - consecutiveEmptyLines++; - // If we've seen 2+ empty lines, likely end of thinking block - if (consecutiveEmptyLines >= 2) { - inThinkingBlock = false; - } - continue; - } - - // Reset empty line counter - consecutiveEmptyLines = 0; - - // Skip lines that are part of thinking block - if (inThinkingBlock) { - continue; - } - - // Keep this line - cleanedLines.push(line); - } - - return cleanedLines.join("\n").trim(); + return text.replace(/[\s\S]*?<\/think>/g, "").trim(); } function ollamaToAnthropicResponse(ollamaResponse, requestedModel) { @@ -732,7 +691,7 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) { // Add text content if present, after stripping thinking blocks if (typeof rawContent === "string" && rawContent.trim()) { - const cleanedContent = stripThinkingBlocks(rawContent); + const cleanedContent = stripThinkTags(rawContent); if (cleanedContent) { contentItems.push({ type: "text", text: cleanedContent }); } @@ -919,6 +878,10 @@ function sanitizePayload(payload) { : "claude-opus-4-5"; clean.model = azureDefaultModel; } else if (providerType === "ollama") { + // Override client model with Ollama config model + const ollamaConfiguredModel = config.ollama?.model; + clean.model = ollamaConfiguredModel; + // Ollama format conversion // Check if model supports tools const { modelNameSupportsTools } = require("../clients/ollama-utils"); @@ -1024,8 +987,15 @@ function sanitizePayload(payload) { } // Very short messages (< 20 chars) without code/technical keywords + // BUT: Common shell commands should NOT be treated as conversational + const shellCommands = /^(pwd|ls|cd|cat|echo|grep|find|ps|top|df|du|whoami|which|env)[\s\.\!\?]*$/; + if (shellCommands.test(trimmed)) { + logger.info({ matched: "shell_command", trimmed }, "Ollama conversational check - SHELL COMMAND detected, keeping tools"); + return false; // NOT conversational - needs tools! + } + if (trimmed.length < 20 && !/code|file|function|error|bug|fix|write|read|create/.test(trimmed)) { - logger.debug({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - matched"); + logger.warn({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - SHORT MESSAGE matched, DELETING TOOLS"); return true; } @@ -1035,13 +1005,16 @@ function sanitizePayload(payload) { if (isConversational) { // Strip all tools for simple conversational messages + const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0; delete clean.tools; delete clean.tool_choice; - logger.debug({ + clean._noToolInjection = true; + logger.warn({ model: config.ollama?.model, - message: "Removed tools for conversational message" - }, "Ollama conversational mode"); - } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) { + message: "Removed tools for conversational message", + originalToolCount, + userMessage: clean.messages?.[clean.messages.length - 1]?.content?.substring(0, 50), + }, "Ollama conversational mode - ALL TOOLS DELETED!"); } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) { // Ollama performance degrades with too many tools // Limit to essential tools only const OLLAMA_ESSENTIAL_TOOLS = new Set([ @@ -1052,7 +1025,8 @@ function sanitizePayload(payload) { "Glob", "Grep", "WebSearch", - "WebFetch" + "WebFetch", + "shell", // Tool is registered as "shell" internally ]); const limitedTools = clean.tools.filter(tool => @@ -1140,6 +1114,9 @@ function sanitizePayload(payload) { } clean.tools = selectedTools.length > 0 ? selectedTools : undefined; + if (!selectedTools.length) { + clean._noToolInjection = true; + } } clean.stream = payload.stream ?? false; @@ -1234,6 +1211,28 @@ function sanitizePayload(payload) { toolCount: clean.tools?.length ?? 0 }, '[CONTEXT_FLOW] After sanitizePayload'); + // === Suggestion mode: tag request and override model if configured === + const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages); + clean._requestMode = isSuggestion ? "suggestion" : "main"; + const smConfig = config.modelProvider?.suggestionModeModel ?? "default"; + if (isSuggestion && smConfig.toLowerCase() !== "default" && smConfig.toLowerCase() !== "none") { + clean.model = smConfig; + clean._suggestionModeModel = smConfig; + } + + // === Topic detection: tag request and override model if configured === + if (clean._requestMode === "main") { + const { isTopicDetection: isTopic } = detectTopicDetection(clean); + if (isTopic) { + clean._requestMode = "topic"; + const tdConfig = config.modelProvider?.topicDetectionModel ?? "default"; + if (tdConfig.toLowerCase() !== "default") { + clean.model = tdConfig; + clean._topicDetectionModel = tdConfig; + } + } + } + return clean; } @@ -1330,9 +1329,12 @@ async function runAgentLoop({ providerType, headers, }) { - console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length); + console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length, 'mode:', cleanPayload._requestMode || 'main', 'model:', cleanPayload.model); logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED'); const settings = resolveLoopOptions(options); + // Detect context window size for intelligent compression + const contextWindowTokens = await getContextWindow(); + console.log('[DEBUG] Context window detected:', contextWindowTokens, 'tokens for provider:', providerType); // Initialize audit logger (no-op if disabled) const auditLogger = createAuditLogger(config.audit); const start = Date.now(); @@ -1340,8 +1342,22 @@ async function runAgentLoop({ let toolCallsExecuted = 0; let fallbackPerformed = false; const toolCallNames = new Map(); - const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count + const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> counta let loopWarningInjected = false; // Track if we've already warned about loops + let emptyResponseRetried = false; // Track if we've retried after an empty LLM response + + // Log agent loop start + logger.info( + { + sessionId: session?.id ?? null, + model: requestedModel, + maxSteps: settings.maxSteps, + maxDurationMs: settings.maxDurationMs, + wantsThinking, + providerType, + }, + "Agent loop started", + ); while (steps < settings.maxSteps) { if (Date.now() - start > settings.maxDurationMs) { @@ -1378,7 +1394,6 @@ async function runAgentLoop({ } steps += 1; - console.log('[LOOP DEBUG] Entered while loop - step:', steps); logger.debug( { sessionId: session?.id ?? null, @@ -1409,7 +1424,8 @@ async function runAgentLoop({ cleanPayload.messages = historyCompression.compressHistory(originalMessages, { keepRecentTurns: config.historyCompression?.keepRecentTurns ?? 10, summarizeOlder: config.historyCompression?.summarizeOlder ?? true, - enabled: true + enabled: true, + contextWindowTokens, }); if (cleanPayload.messages !== originalMessages) { @@ -1694,8 +1710,88 @@ IMPORTANT TOOL USAGE RULES: }); } - const databricksResponse = await invokeModel(cleanPayload); + // === DEBUG: Log request to LLM === + console.log('\n[LLM REQUEST]', new Date().toISOString(), 'step:', steps, 'model:', cleanPayload.model, 'provider:', providerType, 'mode:', cleanPayload._requestMode || 'main'); + console.log('[LLM REQUEST] messages (' + (cleanPayload.messages?.length ?? 0) + '):'); + for (const m of (cleanPayload.messages || [])) { + const preview = typeof m.content === 'string' + ? m.content.substring(0, 200) + : Array.isArray(m.content) + ? m.content.map(b => b.type + ':' + (b.text || b.name || b.tool_use_id || '').substring(0, 80)).join(' | ') + : JSON.stringify(m.content).substring(0, 200); + console.log(' [' + m.role + '] ' + preview); + } + console.log('[LLM REQUEST] tools:', (cleanPayload.tools || []).map(t => t.name || t.function?.name).join(', ') || '(none)'); + console.log('[LLM REQUEST] _noToolInjection:', !!cleanPayload._noToolInjection); + + let databricksResponse; + try { + databricksResponse = await invokeModel(cleanPayload); + } catch (modelError) { + const isConnectionError = modelError.cause?.code === 'ECONNREFUSED' + || modelError.message?.includes('fetch failed') + || modelError.code === 'ECONNREFUSED'; + if (isConnectionError) { + console.error(`[LLM ERROR] ${new Date().toISOString()} Provider ${providerType} is unreachable (connection refused). Is it running?`); + return { + response: { + status: 503, + body: { + error: { + type: "provider_unreachable", + message: `Provider ${providerType} is unreachable. Is the service running?`, + }, + }, + terminationReason: "provider_unreachable", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "provider_unreachable", + }; + } + throw modelError; + } + // === DEBUG: Log response from LLM === + console.log('\n[LLM RESPONSE]', new Date().toISOString(), 'ok:', databricksResponse.ok, 'status:', databricksResponse.status, 'stream:', !!databricksResponse.stream, 'mode:', cleanPayload._requestMode || 'main'); + if (databricksResponse.json) { + const rj = databricksResponse.json; + // Anthropic format + if (rj.content) { + console.log('[LLM RESPONSE] Anthropic format - content blocks:', Array.isArray(rj.content) ? rj.content.length : typeof rj.content); + if (Array.isArray(rj.content)) { + for (const b of rj.content) { + if (b.type === 'text') console.log(' [text] ' + (b.text || '').substring(0, 300)); + else if (b.type === 'tool_use') console.log(' [tool_use] ' + b.name + '(' + JSON.stringify(b.input).substring(0, 200) + ')'); + else console.log(' [' + b.type + ']'); + } + } + console.log('[LLM RESPONSE] stop_reason:', rj.stop_reason); + } + // OpenAI format + if (rj.choices) { + const msg = rj.choices[0]?.message; + console.log('[LLM RESPONSE] OpenAI format - finish_reason:', rj.choices[0]?.finish_reason); + console.log(' [content] ' + (msg?.content || '(null)')); + if (msg?.tool_calls?.length) { + for (const tc of msg.tool_calls) { + console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + (tc.function?.arguments || '') + ')'); + } + } + } + // Ollama format + if (rj.message && !rj.choices && !rj.content) { + console.log('[LLM RESPONSE] Ollama format - done:', rj.done); + console.log(' [content] ' + (rj.message.content || '(empty)')); + if (rj.message.tool_calls?.length) { + for (const tc of rj.message.tool_calls) { + console.log(' [tool_call] ' + (tc.function?.name || tc.name) + '(' + JSON.stringify(tc.function?.arguments || {}) + ')'); + } + } + } + } else { + console.log('[LLM RESPONSE] no json body - raw:', String(databricksResponse.body || '')); + } // Extract and log actual token usage const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false ? tokens.extractUsageFromResponse(databricksResponse.json) @@ -1761,6 +1857,15 @@ IMPORTANT TOOL USAGE RULES: }); } } + logger.info({ + messageContent: databricksResponse.json?.message?.content + ? (typeof databricksResponse.json.message.content === 'string' + ? databricksResponse.json.message.content.substring(0, 500) + : JSON.stringify(databricksResponse.json.message.content).substring(0, 500)) + : 'NO_CONTENT', + hasToolCalls: !!databricksResponse.json?.message?.tool_calls, + toolCallCount: databricksResponse.json?.message?.tool_calls?.length || 0 + }, "=== RAW LLM RESPONSE CONTENT ==="); // Handle streaming responses (pass through without buffering) if (databricksResponse.stream) { @@ -1860,11 +1965,13 @@ IMPORTANT TOOL USAGE RULES: _anthropic_block: block, })); - logger.debug( + logger.info( { sessionId: session?.id ?? null, + step: steps, contentBlocks: contentArray.length, toolCallsFound: toolCalls.length, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), stopReason: databricksResponse.json?.stop_reason, }, "Azure Anthropic response parsed", @@ -1874,13 +1981,182 @@ IMPORTANT TOOL USAGE RULES: const choice = databricksResponse.json?.choices?.[0]; message = choice?.message ?? {}; toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + // Deduplicate tool calls for OpenAI format too + if (toolCalls.length > 0) { + const uniqueToolCalls = []; + const seenSignatures = new Set(); + let duplicatesRemoved = 0; + + for (const call of toolCalls) { + const signature = getToolCallSignature(call); + if (!seenSignatures.has(signature)) { + seenSignatures.add(signature); + uniqueToolCalls.push(call); + } else { + duplicatesRemoved++; + logger.warn({ + sessionId: session?.id ?? null, + toolName: call.function?.name || call.name, + toolId: call.id, + signature: signature.substring(0, 32), + }, "Duplicate tool call removed (same tool with identical parameters in single response)"); + } + } + + toolCalls = uniqueToolCalls; + + logger.info( + { + sessionId: session?.id ?? null, + step: steps, + toolCallsFound: toolCalls.length, + duplicatesRemoved, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), + }, + "LLM Response: Tool calls requested (after deduplication)", + ); + } else if (providerType === "ollama") { + // Ollama format: { message: { role, content, tool_calls }, done } + message = databricksResponse.json?.message ?? {}; + toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + logger.info({ + hasMessage: !!databricksResponse.json?.message, + hasToolCalls: toolCalls.length > 0, + toolCallCount: toolCalls.length, + toolNames: toolCalls.map(tc => tc.function?.name), + done: databricksResponse.json?.done, + fullToolCalls: JSON.stringify(toolCalls), + fullResponseMessage: JSON.stringify(databricksResponse.json?.message) + }, "=== OLLAMA TOOL CALLS EXTRACTION ==="); + } else { + // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] } + const choice = databricksResponse.json?.choices?.[0]; + message = choice?.message ?? {}; + toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + + // Deduplicate tool calls for OpenAI format too + if (toolCalls.length > 0) { + const uniqueToolCalls = []; + const seenSignatures = new Set(); + let duplicatesRemoved = 0; + + for (const call of toolCalls) { + const signature = getToolCallSignature(call); + + if (!seenSignatures.has(signature)) { + seenSignatures.add(signature); + uniqueToolCalls.push(call); + } else { + duplicatesRemoved++; + logger.warn({ + sessionId: session?.id ?? null, + toolName: call.function?.name || call.name, + toolId: call.id, + signature: signature.substring(0, 32), + }, "Duplicate tool call removed (same tool with identical parameters in single response)"); + } + } + + toolCalls = uniqueToolCalls; + + logger.info( + { + sessionId: session?.id ?? null, + step: steps, + toolCallsFound: toolCalls.length, + duplicatesRemoved, + toolNames: toolCalls.map(tc => tc.function?.name || tc.name), + }, + "LLM Response: Tool calls requested (after deduplication)", + ); + } + } + } + + // === EMPTY RESPONSE DETECTION (primary) === + // Check raw extracted message for empty content before tool handling or conversion + const rawTextContent = (() => { + if (typeof message.content === 'string') return message.content.trim(); + if (Array.isArray(message.content)) { + return message.content + .filter(b => b.type === 'text') + .map(b => b.text || '') + .join('') + .trim(); + } + return ''; + })(); + + if (toolCalls.length === 0 && !rawTextContent) { + console.log('[EMPTY RESPONSE] No text content and no tool calls - step:', steps, 'retried:', emptyResponseRetried); + logger.warn({ + sessionId: session?.id ?? null, + step: steps, + messageKeys: Object.keys(message), + contentType: typeof message.content, + rawContentPreview: String(message.content || '').substring(0, 100), + }, "Empty LLM response detected (no text, no tool calls)"); + + // Retry once with a nudge + if (steps < settings.maxSteps && !emptyResponseRetried) { + emptyResponseRetried = true; + cleanPayload.messages.push({ + role: "assistant", + content: "", + }); + cleanPayload.messages.push({ + role: "user", + content: "Please provide a response to the user's message.", + }); + logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge"); + continue; + } + + // Fallback after retry also returned empty + logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry"); + return { + response: { + status: 200, + body: { + id: `msg_${Date.now()}`, + type: "message", + role: "assistant", + model: requestedModel, + content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }], + stop_reason: "end_turn", + usage: { input_tokens: 0, output_tokens: 0 }, + }, + terminationReason: "empty_response_fallback", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "empty_response_fallback", + }; + } + + // Guard: drop hallucinated tool calls when no tools were sent to the model. + // Some models (e.g. Llama 3.1) hallucinate tool_call blocks from conversation + // history even when the request contained zero tool definitions. + const toolsWereSent = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0; + if (toolCalls.length > 0 && !toolsWereSent) { + console.log('[HALLUCINATION GUARD] Model returned', toolCalls.length, 'tool call(s) but no tools were offered — ignoring:', toolCalls.map(tc => tc.function?.name || tc.name)); + logger.warn({ + sessionId: session?.id ?? null, + step: steps, + hallucinated: toolCalls.map(tc => tc.function?.name || tc.name), + noToolInjection: !!cleanPayload._noToolInjection, + }, "Dropped hallucinated tool calls (no tools were sent to model)"); + toolCalls = []; + // If there's also no text content, treat as empty response (handled below) } if (toolCalls.length > 0) { // Convert OpenAI/OpenRouter format to Anthropic format for session storage let sessionContent; if (providerType === "azure-anthropic") { - // Azure Anthropic already returns content in Anthropic format + // Azure Anthropic already returns content in Anthropic sessionContent = databricksResponse.json?.content ?? []; } else { // Convert OpenAI/OpenRouter format to Anthropic content blocks @@ -2129,6 +2405,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, })) ); @@ -2175,6 +2452,15 @@ IMPORTANT TOOL USAGE RULES: cleanPayload.messages.push(toolMessage); + logger.info( + { + toolName: execution.name, + content: typeof toolMessage.content === 'string' + ? toolMessage.content.substring(0, 500) + : JSON.stringify(toolMessage.content).substring(0, 500) + }, "Tool result content sent to LLM", + ); + // Convert to Anthropic format for session storage let sessionToolResultContent; if (providerType === "azure-anthropic") { @@ -2362,8 +2648,18 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, }); + logger.debug( + { + id: execution.id ?? null, + name: execution.name ?? null, + arguments: execution.arguments ?? null, + content: execution.content ?? null, + is_error: execution.ok === false, + }, "executeToolCall response" ); + let toolMessage; if (providerType === "azure-anthropic") { const parsedContent = parseExecutionContent(execution.content); @@ -2562,7 +2858,16 @@ IMPORTANT TOOL USAGE RULES: } } - continue; + logger.info({ + sessionId: session?.id ?? null, + step: steps, + toolCallsExecuted: toolCallsExecuted, + totalToolCallsInThisStep: toolCalls.length, + messageCount: cleanPayload.messages.length, + lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role, + }, "Tool execution complete"); + + continue; // Loop back to invoke model with tool results in context } let anthropicPayload; @@ -2824,6 +3129,68 @@ IMPORTANT TOOL USAGE RULES: anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content); } + // === EMPTY RESPONSE DETECTION (safety net — post-conversion) === + // Primary detection is earlier (before tool handling). This catches edge cases + // where conversion produces empty content from non-empty raw data. + const hasTextContent = (() => { + if (Array.isArray(anthropicPayload.content)) { + return anthropicPayload.content.some(b => b.type === "text" && b.text?.trim()); + } + if (typeof anthropicPayload.content === "string") { + return anthropicPayload.content.trim().length > 0; + } + return false; + })(); + + const hasToolUseBlocks = Array.isArray(anthropicPayload.content) && + anthropicPayload.content.some(b => b.type === "tool_use"); + + if (!hasToolUseBlocks && !hasTextContent) { + logger.warn({ + sessionId: session?.id ?? null, + step: steps, + messageKeys: Object.keys(anthropicPayload), + contentType: typeof anthropicPayload.content, + contentLength: Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content || "").length, + }, "Empty LLM response detected (no text, no tool calls)"); + + // Retry once with a nudge + if (steps < settings.maxSteps && !emptyResponseRetried) { + emptyResponseRetried = true; + cleanPayload.messages.push({ + role: "assistant", + content: "", + }); + cleanPayload.messages.push({ + role: "user", + content: "Please provide a response to the user's message.", + }); + logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge"); + continue; // Go back to top of while loop + } + + // If retry also returned empty, return a fallback message + logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry"); + return { + response: { + status: 200, + body: { + id: `msg_${Date.now()}`, + type: "message", + role: "assistant", + model: requestedModel, + content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }], + stop_reason: "end_turn", + usage: { input_tokens: 0, output_tokens: 0 }, + }, + terminationReason: "empty_response_fallback", + }, + steps, + durationMs: Date.now() - start, + terminationReason: "empty_response_fallback", + }; + } + // Ensure content is an array before calling .find() const content = Array.isArray(anthropicPayload.content) ? anthropicPayload.content : []; const fallbackCandidate = content.find( @@ -3009,6 +3376,7 @@ IMPORTANT TOOL USAGE RULES: session, cwd, requestMessages: cleanPayload.messages, + providerType, }); const toolResultMessage = createFallbackToolResultMessage(providerType, { @@ -3151,6 +3519,18 @@ IMPORTANT TOOL USAGE RULES: }, "Agent loop completed successfully", ); + + // DIAGNOSTIC: Log response being returned + logger.info({ + sessionId: session?.id ?? null, + status: 200, + hasBody: !!anthropicPayload, + bodyKeys: anthropicPayload ? Object.keys(anthropicPayload) : [], + contentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none', + contentLength: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content).length) : 0, + stopReason: anthropicPayload?.stop_reason + }, "=== RETURNING RESPONSE TO CLIENT ==="); + return { response: { status: 200, @@ -3217,6 +3597,114 @@ IMPORTANT TOOL USAGE RULES: }; } +/** + * Detect if the current request is a suggestion mode call. + * Scans the last user message for the [SUGGESTION MODE: marker. + * @param {Array} messages - The conversation messages + * @returns {{ isSuggestionMode: boolean }} + */ +function detectSuggestionMode(messages) { + if (!Array.isArray(messages) || messages.length === 0) { + return { isSuggestionMode: false }; + } + // Scan from the end to find the last user message + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg?.role !== 'user') continue; + const content = typeof msg.content === 'string' + ? msg.content + : Array.isArray(msg.content) + ? msg.content.map(b => b.text || '').join(' ') + : ''; + if (content.includes('[SUGGESTION MODE:')) { + return { isSuggestionMode: true }; + } + // Only check the last user message + break; + } + return { isSuggestionMode: false }; +} + +/** + * Detect if the current request is a topic detection/classification call. + * These requests typically have a system prompt asking to classify conversation + * topics, with no tools and very short messages. They waste GPU time on large + * models (30-90s just to classify a topic). + * + * Detection heuristics: + * 1. System prompt contains topic classification instructions + * 2. No tools in the payload (topic detection never needs tools) + * 3. Short message count (typically 1-3 messages) + * + * @param {Object} payload - The request payload + * @returns {{ isTopicDetection: boolean }} + */ +function detectTopicDetection(payload) { + if (!payload) return { isTopicDetection: false }; + + // Topic detection requests have no tools + if (Array.isArray(payload.tools) && payload.tools.length > 0) { + return { isTopicDetection: false }; + } + + // Check system prompt for topic classification patterns + const systemText = typeof payload.system === 'string' + ? payload.system + : Array.isArray(payload.system) + ? payload.system.map(b => b.text || '').join(' ') + : ''; + + // Also check first message if system prompt is embedded there + let firstMsgText = ''; + if (Array.isArray(payload.messages) && payload.messages.length > 0) { + const first = payload.messages[0]; + if (first?.role === 'user' || first?.role === 'system') { + firstMsgText = typeof first.content === 'string' + ? first.content + : Array.isArray(first.content) + ? first.content.map(b => b.text || '').join(' ') + : ''; + } + } + + const combined = systemText + ' ' + firstMsgText; + const lc = combined.toLowerCase(); + + // Match patterns that Claude Code uses for topic detection + const topicPatterns = [ + 'new conversation topic', + 'topic change', + 'classify the topic', + 'classify this message', + 'conversation topic', + 'topic classification', + 'determines the topic', + 'determine the topic', + 'categorize the topic', + 'what topic', + 'identify the topic', + ]; + + const hasTopicPattern = topicPatterns.some(p => lc.includes(p)); + + if (hasTopicPattern) { + return { isTopicDetection: true }; + } + + // Additional heuristic: very short payload with no tools and system prompt + // mentioning "topic" or "classify" + if ( + !payload.tools && + Array.isArray(payload.messages) && + payload.messages.length <= 3 && + (lc.includes('topic') || lc.includes('classify')) + ) { + return { isTopicDetection: true }; + } + + return { isTopicDetection: false }; +} + async function processMessage({ payload, headers, session, cwd, options = {} }) { const requestedModel = payload?.model ?? @@ -3226,6 +3714,32 @@ async function processMessage({ payload, headers, session, cwd, options = {} }) typeof headers?.["anthropic-beta"] === "string" && headers["anthropic-beta"].includes("interleaved-thinking"); + // === SUGGESTION MODE: Early return when SUGGESTION_MODE_MODEL=none === + const { isSuggestionMode } = detectSuggestionMode(payload?.messages); + const suggestionModelConfig = config.modelProvider?.suggestionModeModel ?? "default"; + if (isSuggestionMode && suggestionModelConfig.toLowerCase() === "none") { + console.log('[SUGGESTION MODE] Skipping LLM call (SUGGESTION_MODE_MODEL=none)'); + return { + response: { + body: { + id: `msg_suggestion_skip_${Date.now()}`, + type: "message", + role: "assistant", + content: [{ type: "text", text: "" }], + model: requestedModel, + stop_reason: "end_turn", + stop_sequence: null, + usage: { input_tokens: 0, output_tokens: 0 }, + }, + ok: true, + status: 200, + }, + steps: 0, + durationMs: 0, + terminationReason: "suggestion_mode_skip", + }; + } + // === TOOL LOOP GUARD (EARLY CHECK) === // Check BEFORE sanitization since sanitizePayload removes conversation history const toolLoopThreshold = config.policy?.toolLoopThreshold ?? 3; diff --git a/src/providers/context-window.js b/src/providers/context-window.js new file mode 100644 index 0000000..dcea89d --- /dev/null +++ b/src/providers/context-window.js @@ -0,0 +1,144 @@ +/** + * Context Window Detection + * + * Queries the active provider for its context window size (in tokens). + * Returns -1 if unknown. Caches the result for the lifetime of the process. + */ + +const config = require("../config"); +const logger = require("../logger"); + +// Known context sizes for proprietary models (tokens) +const KNOWN_CONTEXT_SIZES = { + // Anthropic + "claude-3-opus": 200000, + "claude-3-sonnet": 200000, + "claude-3-haiku": 200000, + "claude-3.5-sonnet": 200000, + "claude-4": 200000, + // OpenAI + "gpt-4o": 128000, + "gpt-4o-mini": 128000, + "gpt-4-turbo": 128000, + "gpt-4": 8192, + "gpt-3.5-turbo": 16385, +}; + +// null = not yet detected, -1 = detected but unknown, >0 = known +let cachedContextWindow = null; + +async function detectContextWindow() { + const provider = config.modelProvider.type; + + try { + if (provider === "ollama") { + return await detectOllamaContextWindow(); + } + if (provider === "openrouter") { + return await detectOpenRouterContextWindow(); + } + if (provider === "openai") { + return detectFromKnownSizes(config.openai.model); + } + // azure-anthropic, bedrock — use known Anthropic sizes + if (["azure-anthropic", "bedrock"].includes(provider)) { + return 200000; + } + if (provider === "azure-openai") { + return detectFromKnownSizes(config.azureOpenAI.deployment); + } + if (provider === "llamacpp" || provider === "lmstudio") { + return -1; // No standard API to query + } + if (provider === "zai") { + return 128000; // GLM-4 family + } + if (provider === "vertex") { + return 1000000; // Gemini models + } + } catch (err) { + logger.warn({ err, provider }, "Failed to detect context window"); + } + + return -1; +} + +async function detectOllamaContextWindow() { + const endpoint = `${config.ollama.endpoint}/api/show`; + const response = await fetch(endpoint, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: config.ollama.model }), + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) return -1; + const data = await response.json(); + + // Ollama prefixes context_length with the architecture name + // (e.g. "llama.context_length", "qwen2.context_length", "gemma.context_length") + // Search for any key ending in ".context_length" or exactly "context_length" + if (data.model_info && typeof data.model_info === "object") { + for (const [key, value] of Object.entries(data.model_info)) { + if (key === "context_length" || key.endsWith(".context_length")) { + if (typeof value === "number" && value > 0) return value; + } + } + } + + // Fallback: parse from parameters string (e.g. "num_ctx 32768") + const match = data.parameters?.match(/num_ctx\s+(\d+)/); + if (match) return parseInt(match[1], 10); + return -1; +} + +async function detectOpenRouterContextWindow() { + const baseEndpoint = config.openrouter.endpoint || "https://openrouter.ai/api/v1/chat/completions"; + // Derive the models endpoint from the chat endpoint + const modelsEndpoint = baseEndpoint.replace(/\/v1\/chat\/completions$/, "/v1/models"); + const response = await fetch(modelsEndpoint, { + headers: { Authorization: `Bearer ${config.openrouter.apiKey}` }, + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) return -1; + const data = await response.json(); + const model = data.data?.find((m) => m.id === config.openrouter.model); + return model?.context_length ?? -1; +} + +function detectFromKnownSizes(modelName) { + if (!modelName) return -1; + const lower = modelName.toLowerCase(); + for (const [key, size] of Object.entries(KNOWN_CONTEXT_SIZES)) { + if (lower.includes(key)) return size; + } + return -1; +} + +async function getContextWindow() { + if (cachedContextWindow !== null) return cachedContextWindow; + cachedContextWindow = await detectContextWindow(); + if (cachedContextWindow === -1) { + logger.warn( + { provider: config.modelProvider.type }, + "Could not detect context window size — falling back to 8K tokens. " + + "Compression may be more aggressive than necessary.", + ); + } else { + logger.info( + { contextWindow: cachedContextWindow, provider: config.modelProvider.type }, + "Context window detected", + ); + } + return cachedContextWindow; +} + +function resetCache() { + cachedContextWindow = null; +} + +module.exports = { + getContextWindow, + detectContextWindow, + resetCache, + KNOWN_CONTEXT_SIZES, +}; diff --git a/src/tools/index.js b/src/tools/index.js index 11227f0..95f4807 100644 --- a/src/tools/index.js +++ b/src/tools/index.js @@ -88,8 +88,43 @@ const TOOL_ALIASES = { runtests: "workspace_test_run", testsummary: "workspace_test_summary", testhistory: "workspace_test_history", + // Glob has dedicated tool in src/tools/indexer.js (registerGlobTool) + // - returns plain text format instead of JSON + // glob: "workspace_list", + // Glob: "workspace_list", }; +/** + * Recursively parse string values that look like JSON arrays/objects. + * Some providers double-serialize nested parameters (e.g. questions: "[{...}]" + * instead of questions: [{...}]), which causes schema validation failures. + */ +function deepParseStringifiedJson(obj) { + if (typeof obj !== "object" || obj === null) return obj; + if (Array.isArray(obj)) return obj.map(deepParseStringifiedJson); + + const result = {}; + for (const [key, value] of Object.entries(obj)) { + if (typeof value === "string") { + const trimmed = value.trim(); + if ( + (trimmed.startsWith("[") && trimmed.endsWith("]")) || + (trimmed.startsWith("{") && trimmed.endsWith("}")) + ) { + try { + result[key] = deepParseStringifiedJson(JSON.parse(trimmed)); + continue; + } catch { + // Not valid JSON, keep as string + } + } + } + result[key] = + typeof value === "object" ? deepParseStringifiedJson(value) : value; + } + return result; +} + function coerceString(value) { if (value === undefined || value === null) return ""; if (typeof value === "string") return value; @@ -124,24 +159,65 @@ function normalizeHandlerResult(result) { return { ok, status, content, metadata }; } -function parseArguments(call) { +function parseArguments(call, providerType = null) { const raw = call?.function?.arguments; - if (typeof raw !== "string" || raw.trim().length === 0) return {}; + + // DEBUG: Log full call structure for diagnosis + logger.info({ + providerType, + fullCall: JSON.stringify(call), + hasFunction: !!call?.function, + functionKeys: call?.function ? Object.keys(call.function) : [], + argumentsType: typeof raw, + argumentsValue: raw, + argumentsIsNull: raw === null, + argumentsIsUndefined: raw === undefined, + }, "=== PARSING TOOL ARGUMENTS ==="); + + // Ollama sends arguments as an object, OpenAI as a JSON string + if (typeof raw === "object" && raw !== null) { + if (providerType !== "ollama") { + logger.warn({ + providerType, + expectedProvider: "ollama", + argumentsType: typeof raw, + arguments: raw + }, `Received object arguments but provider is ${providerType || "unknown"}, expected ollama format. Continuing with object.`); + } else { + logger.info({ + type: "object", + arguments: raw + }, "Tool arguments already parsed (Ollama format)"); + } + return deepParseStringifiedJson(raw); + } + + if (typeof raw !== "string" || raw.trim().length === 0) { + logger.warn({ + argumentsType: typeof raw, + argumentsEmpty: !raw || raw.trim().length === 0, + providerType + }, "Arguments not a string or empty - returning {}"); + return {}; + } + try { - return JSON.parse(raw); + const parsed = JSON.parse(raw); + logger.info({ parsed }, "Parsed JSON string arguments"); + return deepParseStringifiedJson(parsed); } catch (err) { - logger.warn({ err }, "Failed to parse tool arguments"); + logger.warn({ err, raw }, "Failed to parse tool arguments"); return {}; } } -function normaliseToolCall(call) { +function normaliseToolCall(call, providerType = null) { const name = call?.function?.name ?? call?.name; const id = call?.id ?? `${name ?? "tool"}_${Date.now()}`; return { id, name, - arguments: parseArguments(call), + arguments: parseArguments(call, providerType), raw: call, }; } @@ -182,7 +258,8 @@ function listTools() { } async function executeToolCall(call, context = {}) { - const normalisedCall = normaliseToolCall(call); + const providerType = context?.providerType || context?.provider || null; + const normalisedCall = normaliseToolCall(call, providerType); let registered = registry.get(normalisedCall.name); if (!registered) { const aliasTarget = TOOL_ALIASES[normalisedCall.name.toLowerCase()]; @@ -225,6 +302,10 @@ async function executeToolCall(call, context = {}) { } if (!registered) { + logger.warn({ + tool: normalisedCall.name, + id: normalisedCall.id + }, "Tool not registered"); const content = coerceString({ error: "tool_not_registered", tool: normalisedCall.name, @@ -241,6 +322,17 @@ async function executeToolCall(call, context = {}) { }; } + // Log tool invocation with full details for debugging + logger.info({ + tool: normalisedCall.name, + id: normalisedCall.id, + args: normalisedCall.arguments, + argsKeys: Object.keys(normalisedCall.arguments || {}), + rawCall: JSON.stringify(normalisedCall.raw) + }, "=== EXECUTING TOOL ==="); + + startTime = Date.now() + try { const result = await registered.handler( { @@ -251,11 +343,47 @@ async function executeToolCall(call, context = {}) { }, context, ); - const formatted = normalizeHandlerResult(result); + let formatted = normalizeHandlerResult(result); + + // Auto-approve external file reads: the user already asked to read the file, + // so re-execute transparently with user_approved=true instead of relying + // on the LLM to manage a multi-step approval conversation. + if ( + formatted.content && + typeof formatted.content === "string" && + formatted.content.startsWith("[APPROVAL REQUIRED]") + ) { + logger.info( + { tool: normalisedCall.name, id: normalisedCall.id }, + "Auto-approving external file read (user initiated the request)", + ); + const approvedResult = await registered.handler( + { + id: normalisedCall.id, + name: normalisedCall.name, + args: { ...normalisedCall.arguments, user_approved: true }, + raw: normalisedCall.raw, + }, + context, + ); + formatted = normalizeHandlerResult(approvedResult); + } // Apply tool output truncation for token efficiency const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content); + const durationMs = Date.now() - startTime; + + // Log successful execution + logger.info({ + tool: normalisedCall.name, + id: normalisedCall.id, + status: formatted.status, + durationMs, + outputLength: truncatedContent?.length || 0, + truncated: truncatedContent !== formatted.content + }, "Tool execution completed"); + return { id: normalisedCall.id, name: normalisedCall.name, @@ -267,11 +395,20 @@ async function executeToolCall(call, context = {}) { registered: true, truncated: truncatedContent !== formatted.content, originalLength: formatted.content?.length, - truncatedLength: truncatedContent?.length + truncatedLength: truncatedContent?.length, + durationMs }, }; } catch (err) { - logger.error({ err, tool: normalisedCall.name }, "Tool execution failed"); + const durationMs = Date.now() - startTime; + + logger.error({ + err, + tool: normalisedCall.name, + id: normalisedCall.id, + durationMs + }, "Tool execution failed"); + return { id: normalisedCall.id, name: normalisedCall.name, @@ -286,6 +423,7 @@ async function executeToolCall(call, context = {}) { metadata: { registered: true, error: true, + durationMs }, error: err, }; diff --git a/src/tools/indexer.js b/src/tools/indexer.js index eb0a981..bf13ca8 100644 --- a/src/tools/indexer.js +++ b/src/tools/indexer.js @@ -16,11 +16,13 @@ function registerWorkspaceListTool() { registerTool( "workspace_list", async ({ args = {} }) => { + // Support both 'pattern' (Glob tool) and 'patterns' (workspace_list) + const rawPatterns = args.pattern ?? args.patterns; const patterns = - typeof args.patterns === "string" - ? [args.patterns] - : Array.isArray(args.patterns) - ? args.patterns + typeof rawPatterns === "string" + ? [rawPatterns] + : Array.isArray(rawPatterns) + ? rawPatterns : undefined; const ignore = typeof args.ignore === "string" @@ -53,10 +55,62 @@ function registerWorkspaceListTool() { ); } +/** + * Search recent conversation context for content matching a query. + * + * Scans the last 10 messages for tool_result content that matches + * the query words. Returns matches sorted by relevance. + * + * @param {string} query - Search query + * @param {Array} messages - Recent conversation messages + * @returns {Array} Matching context snippets + */ +function searchRecentContext(query, messages) { + if (!query || !messages || !Array.isArray(messages)) return []; + + const queryLower = query.toLowerCase(); + const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 2); + if (queryWords.length === 0) return []; + + const matches = []; + + // Scan last 10 messages for tool_result content + const recent = messages.slice(-10); + for (const msg of recent) { + if (msg.role !== "tool" && msg.role !== "user") continue; + + const content = + typeof msg.content === "string" + ? msg.content + : Array.isArray(msg.content) + ? msg.content + .filter((b) => b.type === "tool_result" || b.type === "text") + .map((b) => b.content ?? b.text ?? "") + .join("\n") + : ""; + + if (!content || content.length < 20) continue; + + // Check if any query words appear in the content + const contentLower = content.toLowerCase(); + const matchCount = queryWords.filter((w) => contentLower.includes(w)).length; + + if (matchCount > 0 && matchCount / queryWords.length >= 0.3) { + matches.push({ + source: "conversation_context", + relevance: matchCount / queryWords.length, + preview: content.substring(0, 500), + }); + } + } + + return matches.sort((a, b) => b.relevance - a.relevance).slice(0, 3); +} + function registerWorkspaceSearchTool() { registerTool( "workspace_search", - async ({ args = {} }) => { + async ({ args = {} }, context = {}) => { const query = args.query ?? args.term ?? args.pattern; const regex = args.regex === true || args.is_regex === true; const limit = Number.isInteger(args.limit) ? args.limit : undefined; @@ -67,6 +121,9 @@ function registerWorkspaceSearchTool() { ? args.ignore : undefined; + // Check recent conversation context for matching content + const contextMatches = searchRecentContext(query, context.requestMessages); + const result = await searchWorkspace({ query, regex, @@ -74,12 +131,21 @@ function registerWorkspaceSearchTool() { ignore, }); + // Prepend context matches if found + if (contextMatches.length > 0) { + result.context_matches = contextMatches; + result.note = + "Results from recently read files are listed in context_matches. " + + "Prefer these over workspace matches when answering about previously read content."; + } + return { ok: true, status: 200, content: JSON.stringify(result, null, 2), metadata: { total: result.matches.length, + contextTotal: contextMatches.length, engine: result.engine, }, }; @@ -260,6 +326,45 @@ function registerSymbolReferencesTool() { ); } + +/** + * Dedicated Glob tool for Claude Code compatibility (maybe others?). + * + * Why this exists (instead of using workspace_list alias): + * - Claude Code's Glob tool returns plain text (one path per line) + * - workspace_list returns JSON with entries array + * - Models expect plain text format from Glob tool + * + * See also: TOOL_ALIASES in src/tools/index.js (commented glob entries) + */ +function registerGlobTool() { + registerTool( + "Glob", + async ({ args = {} }) => { + const pattern = args.pattern; + const basePath = args.path; + + let patterns; + if (basePath) { + const cleanPath = basePath.replace(/\/+$/, ""); + patterns = pattern ? [`${cleanPath}/${pattern}`] : [`${cleanPath}/**/*`]; + } else { + patterns = pattern ? [pattern] : undefined; + } + + const entries = await listWorkspaceFiles({ patterns, limit: 1000 }); + + // Plain text output: one path per line (Claude Code format) + return { + ok: true, + status: 200, + content: entries.map((e) => e.path).join("\n"), + }; + }, + { category: "indexing" }, + ); +} + function registerGotoDefinitionTool() { registerTool( "workspace_goto_definition", @@ -353,6 +458,7 @@ function registerIndexerTools() { registerSymbolSearchTool(); registerSymbolReferencesTool(); registerGotoDefinitionTool(); + registerGlobTool(); } module.exports = { diff --git a/src/tools/stubs.js b/src/tools/stubs.js index c026e8e..d2f1bd3 100644 --- a/src/tools/stubs.js +++ b/src/tools/stubs.js @@ -41,12 +41,41 @@ function createStubHandler(name, description) { }); } +function askUserQuestionHandler({ args }) { + let questions = args?.questions ?? []; + + if (typeof questions === "string") { + try { questions = JSON.parse(questions); } catch { questions = []; } + } + + if (!Array.isArray(questions)) questions = [questions]; + const lines = questions.map((q, i) => { + const header = q.header ? `[${q.header}] ` : ""; + const opts = (q.options ?? []) + .map((o, j) => ` ${j + 1}. ${o.label} — ${o.description}`) + .join("\n"); + return `${header}${q.question}\n${opts}`; + }); + + return { + ok: true, + status: 200, + content: lines.join("\n\n"), + }; +} + function registerStubTools() { STUB_TOOLS.forEach((tool) => { if (!hasTool(tool.name)) { registerTool(tool.name, createStubHandler(tool.name, tool.description), tool); } }); + + if (!hasTool("AskUserQuestion")) { + registerTool("AskUserQuestion", askUserQuestionHandler, { + description: "Returns the model's question to the user as assistant output.", + }); + } } module.exports = { diff --git a/src/tools/workspace.js b/src/tools/workspace.js index 144c6c1..37933ae 100644 --- a/src/tools/workspace.js +++ b/src/tools/workspace.js @@ -1,8 +1,12 @@ +const path = require("path"); const { readFile, writeFile, applyFilePatch, resolveWorkspacePath, + expandTilde, + isExternalPath, + readExternalFile, fileExists, workspaceRoot, } = require("../workspace"); @@ -30,17 +34,40 @@ function registerWorkspaceTools() { registerTool( "fs_read", async ({ args = {} }) => { - const relativePath = validateString(args.path ?? args.file, "path"); + const targetPath = validateString(args.path ?? args.file ?? args.file_path, "path"); const encoding = normalizeEncoding(args.encoding); - const content = await readFile(relativePath, encoding); + + // Check if path is outside workspace + if (isExternalPath(targetPath)) { + if (args.user_approved !== true) { + const expanded = expandTilde(targetPath); + const resolved = path.resolve(expanded); + return { + ok: true, + status: 200, + content: `[APPROVAL REQUIRED] The file "${resolved}" is outside the workspace and cannot be read without user permission.\n\nYou must now ask the user: "The file ${resolved} is outside the workspace. May I read it?"\n\nIf the user says yes, call the Read tool again with file_path="${targetPath}" and user_approved=true.`, + }; + } + // User approved — read external file + const { content, resolvedPath } = await readExternalFile(targetPath, encoding); + return { + ok: true, + status: 200, + content, + metadata: { path: targetPath, encoding, resolved_path: resolvedPath }, + }; + } + + // Normal workspace read (unchanged) + const content = await readFile(targetPath, encoding); return { ok: true, status: 200, content, metadata: { - path: relativePath, + path: targetPath, encoding, - resolved_path: resolveWorkspacePath(relativePath), + resolved_path: resolveWorkspacePath(targetPath), }, }; }, @@ -114,7 +141,7 @@ function registerWorkspaceTools() { registerTool( "edit_patch", async ({ args = {} }, context = {}) => { - const relativePath = validateString(args.path ?? args.file, "path"); + const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path"); const patch = validateString(args.patch, "patch"); const encoding = normalizeEncoding(args.encoding); diff --git a/src/workspace/index.js b/src/workspace/index.js index da1a7e0..6cc058a 100644 --- a/src/workspace/index.js +++ b/src/workspace/index.js @@ -10,6 +10,33 @@ if (!fs.existsSync(workspaceRoot)) { fs.mkdirSync(workspaceRoot, { recursive: true }); } +function expandTilde(targetPath) { + if (typeof targetPath !== "string") return targetPath; + if (targetPath.startsWith("~")) { + const home = process.env.HOME || process.env.USERPROFILE; + if (home) { + return path.join(home, targetPath.slice(1)); + } + } + return targetPath; +} + +function isExternalPath(targetPath) { + const expanded = expandTilde(targetPath); + const resolved = path.resolve(workspaceRoot, expanded); + return !resolved.startsWith(workspaceRoot); +} + +async function readExternalFile(targetPath, encoding = "utf8") { + const expanded = expandTilde(targetPath); + const resolved = path.resolve(expanded); + const stats = await fsp.stat(resolved); + if (!stats.isFile()) { + throw new Error("Requested path is not a file."); + } + return { content: await fsp.readFile(resolved, { encoding }), resolvedPath: resolved }; +} + function resolveWorkspacePath(targetPath) { if (!targetPath || typeof targetPath !== "string") { throw new Error("Path must be a non-empty string."); @@ -110,6 +137,9 @@ function validateCwd(cwd) { module.exports = { workspaceRoot, resolveWorkspacePath, + expandTilde, + isExternalPath, + readExternalFile, readFile, writeFile, fileExists,