diff --git a/.env.example b/.env.example
index a462e81..b85da41 100644
--- a/.env.example
+++ b/.env.example
@@ -212,6 +212,14 @@ WORKSPACE_INDEX_ENABLED=true
 # - client/passthrough: Return tool calls to CLI for local execution
 TOOL_EXECUTION_MODE=server
 
+# Suggestion mode model override
+# Controls which model handles suggestion mode (predicting next user input).
+# Values:
+#   default - Use the same model as MODEL_PROVIDER (no change)
+#   none    - Skip suggestion mode LLM calls entirely (saves GPU time)
+#   <model> - Use a specific model (e.g. "llama3.1" for a lighter model)
+SUGGESTION_MODE_MODEL=default
+
 # Enable/disable automatic tool injection for local models
 INJECT_TOOLS_LLAMACPP=true
 INJECT_TOOLS_OLLAMA=true
diff --git a/src/api/middleware/logging.js b/src/api/middleware/logging.js
index e53faee..dc72b03 100644
--- a/src/api/middleware/logging.js
+++ b/src/api/middleware/logging.js
@@ -12,26 +12,92 @@ function maskHeaders(headers = {}) {
   return clone;
 }
 
-const loggingMiddleware = pinoHttp({
+const baseLoggingMiddleware = pinoHttp({
   logger,
-  customProps: (req) => ({
+  autoLogging: false, // Disable automatic logging so we can log manually with bodies
+  customProps: (req, res) => ({
     sessionId: req.sessionId ?? null,
   }),
-  customLogLevel: (req, res, err) => {
-    if (err || res.statusCode >= 500) return "error";
-    if (res.statusCode >= 400) return "warn";
-    return "info";
-  },
-  wrapSerializers: true,
-  serializers: {
-    req(req) {
-      return {
+});
+
+// Wrapper middleware to capture and log full request/response bodies
+function loggingMiddleware(req, res, next) {
+  const startTime = Date.now();
+
+  // Log request with full body immediately
+  logger.info({
+    sessionId: req.sessionId ?? null,
+    req: {
+      method: req.method,
+      url: req.url,
+      headers: maskHeaders(req.headers),
+    },
+    requestBody: req.body, // Full request body without truncation
+  }, 'request started');
+
+  // Intercept res.write for streaming responses
+  const originalWrite = res.write;
+  const chunks = [];
+  res.write = function (chunk) {
+    if (chunk) {
+      chunks.push(Buffer.from(chunk));
+    }
+    return originalWrite.apply(this, arguments);
+  };
+
+  // Intercept res.send to capture the body
+  const originalSend = res.send;
+  res.send = function (body) {
+    res._capturedBody = body;
+
+    // Parse if it's a JSON string for better logging
+    if (typeof body === 'string') {
+      try {
+        res._capturedBody = JSON.parse(body);
+      } catch (e) {
+        res._capturedBody = body;
+      }
+    }
+
+    return originalSend.call(this, body);
+  };
+
+  // Log response when finished
+  res.on('finish', () => {
+    const responseTime = Date.now() - startTime;
+
+    // Capture streaming body if not already captured via send()
+    if (chunks.length > 0 && !res._capturedBody) {
+      const fullBody = Buffer.concat(chunks).toString('utf8');
+      res._capturedBody = {
+        type: 'stream',
+        contentType: res.getHeader('content-type'),
+        size: fullBody.length,
+        preview: fullBody.substring(0, 1000)
+      };
+    }
+
+    const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info';
+
+    logger[logLevel]({
+      sessionId: req.sessionId ?? null,
+      req: {
         method: req.method,
         url: req.url,
         headers: maskHeaders(req.headers),
-      };
-    },
-  },
-});
+      },
+      res: {
+        statusCode: res.statusCode,
+        headers: res.getHeaders ? res.getHeaders() : res.headers,
+      },
+      requestBody: req.body, // Full request body without truncation
+      responseBody: res._capturedBody, // Full response body without truncation
+      responseTime,
+    }, 'request completed');
+  });
+
+  // Still call base middleware to set up req.log
+  baseLoggingMiddleware(req, res, next);
+}
 
 module.exports = loggingMiddleware;
diff --git a/src/api/middleware/request-logging.js b/src/api/middleware/request-logging.js
index 8352e1a..cf2709e 100644
--- a/src/api/middleware/request-logging.js
+++ b/src/api/middleware/request-logging.js
@@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) {
   // Add to response headers
   res.setHeader("X-Request-ID", requestId);
 
-  // Log request start
+// Log request start with full body
   logger.info(
     {
       requestId,
       method: req.method,
       path: req.path || req.url,
       query: req.query,
+      body: req.body, // Full request body without truncation
       ip: req.ip || req.socket.remoteAddress,
       userAgent: req.headers["user-agent"],
     },
@@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) {
   res.send = function (body) {
     const duration = Date.now() - startTime;
 
-    // Log request completion
+    // Parse body if it's a string
+    let responseBody = body;
+    if (typeof body === 'string') {
+      try {
+        responseBody = JSON.parse(body);
+      } catch (e) {
+        // Keep as string if not JSON
+        responseBody = body;
+      }
+    }
+
+    // Log request completion with full request and response bodies
     logger.info(
       {
         requestId,
@@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) {
         status: res.statusCode,
         duration,
         contentLength: res.getHeader("content-length"),
+        requestBody: req.body, // Full request body for reference
+        responseBody, // Full response body without truncation
       },
       "Request completed"
     );
diff --git a/src/api/router.js b/src/api/router.js
index b3ed198..057341d 100644
--- a/src/api/router.js
+++ b/src/api/router.js
@@ -7,6 +7,7 @@ const openaiRouter = require("./openai-router");
 const providersRouter = require("./providers-handler");
 const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
 const { validateCwd } = require("../workspace");
+const logger = require("../logger");
 
 const router = express.Router();
 
@@ -121,6 +122,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
     const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream);
     const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0;
 
+    logger.info({
+      sessionId: req.headers['x-claude-session-id'],
+      wantsStream,
+      hasTools,
+      willUseStreamingPath: wantsStream || hasTools
+    }, "=== REQUEST ROUTING DECISION ===");
+
     // Analyze complexity for routing headers (Phase 3)
     const complexity = analyzeComplexity(req.body);
     const routingHeaders = getRoutingHeaders({
@@ -338,6 +346,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
     // Legacy streaming wrapper (for tool-based requests that requested streaming)
     if (wantsStream && hasTools) {
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        pathType: 'legacy_streaming_wrapper',
+        wantsStream,
+        hasTools
+      }, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ===");
+
       metrics.recordStreamingStart();
       res.set({
         "Content-Type": "text/event-stream",
@@ -359,6 +374,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       // Use proper Anthropic SSE format
       const msg = result.body;
 
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        eventType: 'message_start',
+        streamingWithTools: true,
+        hasContent: !!(msg.content && msg.content.length > 0)
+      }, "=== SENDING SSE MESSAGE_START ===");
+      
       // 1. message_start
       res.write(`event: message_start\n`);
       res.write(`data: ${JSON.stringify({
@@ -419,9 +441,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
           res.write(`event: content_block_stop\n`);
           res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+        } else if (block.type === "tool_result") {
+          // === TOOL_RESULT SSE STREAMING - ENTERED ===
+          logger.info({
+            blockIndex: i,
+            blockType: block.type,
+            toolUseId: block.tool_use_id,
+            contentType: typeof block.content,
+            contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ===");
+
+          // Stream tool_result blocks so CLI can display actual tool output
+          res.write(`event: content_block_start\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_start",
+            index: i,
+            content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" }
+          })}\n\n`);
+
+          // Stream the actual content
+          const content = typeof block.content === 'string'
+            ? block.content
+            : JSON.stringify(block.content);
+
+          logger.info({
+            blockIndex: i,
+            contentLength: content.length,
+            contentPreview: content.substring(0, 200)
+          }, "=== SSE: STREAMING TOOL_RESULT CONTENT ===");
+
+          res.write(`event: content_block_delta\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_delta",
+            index: i,
+            delta: { type: "tool_result_delta", content: content }
+          })}\n\n`);
+
+          res.write(`event: content_block_stop\n`);
+          res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+
+          // === TOOL_RESULT SSE STREAMING - COMPLETED ===
+          logger.info({
+            blockIndex: i,
+            toolUseId: block.tool_use_id
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ===");
         }
       }
-
       // 3. message_delta with stop_reason
       res.write(`event: message_delta\n`);
       res.write(`data: ${JSON.stringify({
@@ -454,6 +519,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       });
     }
 
+
+    // DIAGNOSTIC: Log response being sent to client
+    logger.info({
+      status: result.status,
+      hasBody: !!result.body,
+      bodyKeys: result.body ? Object.keys(result.body) : [],
+      bodyType: typeof result.body,
+      contentLength: result.body ? JSON.stringify(result.body).length : 0
+    }, "=== SENDING RESPONSE TO CLIENT ===");
+
     metrics.recordResponse(result.status);
     res.status(result.status).send(result.body);
   } catch (error) {
diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index d2d0d03..09fc176 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -181,7 +181,7 @@ async function invokeDatabricks(body) {
   const databricksBody = { ...body };
 
   // Inject standard tools if client didn't send any (passthrough mode)
-  if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0)) {
     databricksBody.tools = STANDARD_TOOLS;
     logger.info({
       injectedToolCount: STANDARD_TOOLS.length,
@@ -222,7 +222,7 @@ async function invokeAzureAnthropic(body) {
   }
 
   // Inject standard tools if client didn't send any (passthrough mode)
-  if (!Array.isArray(body.tools) || body.tools.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(body.tools) || body.tools.length === 0)) {
     body.tools = STANDARD_TOOLS;
     logger.info({
       injectedToolCount: STANDARD_TOOLS.length,
@@ -309,7 +309,7 @@ async function invokeOllama(body) {
   }
 
   const ollamaBody = {
-    model: config.ollama.model,
+    model: body._suggestionModeModel || config.ollama.model,
     messages: deduplicated,
     stream: false,  // Force non-streaming for Ollama - streaming format conversion not yet implemented
     options: {
@@ -331,7 +331,7 @@ async function invokeOllama(body) {
   if (!supportsTools) {
     // Model doesn't support tools - don't inject them
     toolsToSend = null;
-  } else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
+  } else if (injectToolsOllama && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Model supports tools and none provided - inject them
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -399,7 +399,7 @@ async function invokeOpenRouter(body) {
   }
 
   const openRouterBody = {
-    model: config.openrouter.model,
+    model: body._suggestionModeModel || config.openrouter.model,
     messages,
     temperature: body.temperature ?? 0.7,
     max_tokens: body.max_tokens ?? 4096,
@@ -411,7 +411,7 @@ async function invokeOpenRouter(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -485,14 +485,14 @@ async function invokeAzureOpenAI(body) {
     max_tokens: Math.min(body.max_tokens ?? 4096, 16384),  // Cap at Azure OpenAI's limit
     top_p: body.top_p ?? 1.0,
     stream: false,  // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented
-    model: config.azureOpenAI.deployment
+    model: body._suggestionModeModel || config.azureOpenAI.deployment
   };
 
   // Add tools - inject standard tools if client didn't send any (passthrough mode)
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -831,7 +831,7 @@ async function invokeOpenAI(body) {
   }
 
   const openAIBody = {
-    model: config.openai.model || "gpt-4o",
+    model: body._suggestionModeModel || config.openai.model || "gpt-4o",
     messages,
     temperature: body.temperature ?? 0.7,
     max_tokens: body.max_tokens ?? 4096,
@@ -843,7 +843,7 @@ async function invokeOpenAI(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -945,7 +945,7 @@ async function invokeLlamaCpp(body) {
   let toolsInjected = false;
 
   const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false";
-  if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
+  if (injectToolsLlamacpp && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1028,7 +1028,7 @@ async function invokeLMStudio(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1075,7 +1075,7 @@ async function invokeBedrock(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1359,7 +1359,7 @@ async function invokeZai(body) {
     zaiBody.model = mappedModel;
 
     // Inject standard tools if client didn't send any (passthrough mode)
-    if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) {
+    if (!body._noToolInjection && (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0)) {
       zaiBody.tools = STANDARD_TOOLS;
       logger.info({
         injectedToolCount: STANDARD_TOOLS.length,
diff --git a/src/clients/ollama-utils.js b/src/clients/ollama-utils.js
index 7582f05..2cd95c9 100644
--- a/src/clients/ollama-utils.js
+++ b/src/clients/ollama-utils.js
@@ -93,6 +93,65 @@ function convertAnthropicToolsToOllama(anthropicTools) {
   }));
 }
 
+/**
+ * Extract tool call from text when LLM outputs JSON instead of using tool_calls
+ * Handles formats like: {"name": "Read", "parameters": {...}}
+ *
+ * @param {string} text - Text content that may contain JSON tool call
+ * @returns {object|null} - Tool call object in Ollama format, or null if not found
+ */
+function extractToolCallFromText(text) {
+  if (!text || typeof text !== 'string') return null;
+
+  // Find potential JSON start - look for {"name" pattern
+  const startMatch = text.match(/\{\s*"name"\s*:/);
+  if (!startMatch) return null;
+
+  const startIdx = startMatch.index;
+
+  // Find matching closing brace using brace counting
+  let braceCount = 0;
+  let endIdx = -1;
+  for (let i = startIdx; i < text.length; i++) {
+    if (text[i] === '{') braceCount++;
+    else if (text[i] === '}') {
+      braceCount--;
+      if (braceCount === 0) {
+        endIdx = i + 1;
+        break;
+      }
+    }
+  }
+
+  if (endIdx === -1) return null;
+
+  const jsonStr = text.substring(startIdx, endIdx);
+
+  try {
+    const parsed = JSON.parse(jsonStr);
+
+    if (!parsed.name || !parsed.parameters) {
+      return null;
+    }
+
+    logger.info({
+      toolName: parsed.name,
+      params: parsed.parameters,
+      originalText: text.substring(0, 200)
+    }, "Extracted tool call from text content (fallback parsing)");
+
+    return {
+      function: {
+        name: parsed.name,
+        arguments: parsed.parameters
+      }
+    };
+  } catch (e) {
+    logger.debug({ error: e.message, text: text.substring(0, 200) }, "Failed to parse extracted tool call");
+    return null;
+  }
+}
+
 /**
  * Convert Ollama tool call response to Anthropic format
  *
@@ -126,6 +185,15 @@ function convertOllamaToolCallsToAnthropic(ollamaResponse) {
   const toolCalls = message.tool_calls || [];
   const textContent = message.content || "";
 
+  // FALLBACK: If no tool_calls but text contains JSON tool call, parse it
+  if (toolCalls.length === 0 && textContent) {
+    const extracted = extractToolCallFromText(textContent);
+    if (extracted) {
+      logger.info({ extractedTool: extracted.function?.name }, "Using fallback text parsing for tool call");
+      toolCalls = [extracted];
+    }
+  }
+
   const contentBlocks = [];
 
   // Add text content if present
@@ -217,4 +285,5 @@ module.exports = {
   convertOllamaToolCallsToAnthropic,
   buildAnthropicResponseFromOllama,
   modelNameSupportsTools,
+  extractToolCallFromText,
 };
diff --git a/src/clients/retry.js b/src/clients/retry.js
index 2178206..5d90654 100644
--- a/src/clients/retry.js
+++ b/src/clients/retry.js
@@ -10,7 +10,7 @@ const DEFAULT_CONFIG = {
   backoffMultiplier: 2,
   jitterFactor: 0.1, // 10% jitter
   retryableStatuses: [429, 500, 502, 503, 504],
-  retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH'],
+  retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH', 'ECONNREFUSED'],
 };
 
 /**
@@ -44,6 +44,11 @@ function isRetryable(error, response, config) {
     return true;
   }
 
+  // Check nested cause (Node undici wraps connection errors as TypeError)
+  if (error && error.cause?.code && config.retryableErrors.includes(error.cause.code)) {
+    return true;
+  }
+
   // Check for network errors
   if (error && (error.name === 'FetchError' || error.name === 'AbortError')) {
     return true;
diff --git a/src/clients/standard-tools.js b/src/clients/standard-tools.js
index 51e4163..6cfd833 100644
--- a/src/clients/standard-tools.js
+++ b/src/clients/standard-tools.js
@@ -24,13 +24,17 @@ const STANDARD_TOOLS = [
   },
   {
     name: "Read",
-    description: "Reads a file from the local filesystem. You can access any file directly by using this tool.",
+    description: "Reads a file from the local filesystem. You can access any file directly by using this tool.\n\nEXTERNAL FILE APPROVAL FLOW: When reading a file outside the workspace, the tool will return an [APPROVAL REQUIRED] message instead of the file content. When this happens you MUST: (1) Tell the user the file is outside the workspace and ask for permission. (2) If the user approves, call this tool again with the SAME file_path and set user_approved=true. (3) Only then will the file content be returned.",
     input_schema: {
       type: "object",
       properties: {
         file_path: {
           type: "string",
-          description: "Relative path within workspace (e.g., 'config.js', 'src/index.ts'). DO NOT use absolute paths."
+          description: "Path to the file. Use relative paths for workspace files (e.g., 'src/index.ts'). For files outside the workspace use absolute paths or ~ for the home directory (e.g., '~/Documents/notes.md', '/etc/hosts'). Each call reads ONE file only — do not pass multiple paths."
+        },
+        user_approved: {
+          type: "boolean",
+          description: "Set to true ONLY after the user has explicitly approved reading a file outside the workspace. Never set this to true without asking the user first."
         },
         limit: {
           type: "number",
diff --git a/src/config/index.js b/src/config/index.js
index 51cc548..9da383a 100644
--- a/src/config/index.js
+++ b/src/config/index.js
@@ -1,7 +1,9 @@
 const path = require("path");
 const dotenv = require("dotenv");
 
-dotenv.config();
+// .env must be authoritative over shell env vars (e.g. stale exports in .bashrc).
+// Skip override in test mode so tests can set process.env before requiring config.
+dotenv.config({ override: process.env.NODE_ENV !== "test" });
 
 function trimTrailingSlash(value) {
   if (typeof value !== "string") return value;
@@ -134,6 +136,14 @@ const zaiModel = process.env.ZAI_MODEL?.trim() || "GLM-4.7";
 const vertexApiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null;
 const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";
 
+// Suggestion mode model override
+// Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name
+const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
+
+// Topic detection model override
+// Values: "default" (use main model) or a model name to redirect topic detection to a lighter model
+const topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim();
+
 // Hot reload configuration
 const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true
 const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10);
@@ -170,6 +180,13 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) {
     "TOOL_EXECUTION_MODE must be one of: server, client, passthrough (default: server)"
   );
 }
+console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`);
+if (suggestionModeModel.toLowerCase() !== "default") {
+  console.log(`[CONFIG] Suggestion mode model: ${suggestionModeModel}`);
+}
+if (topicDetectionModel.toLowerCase() !== "default") {
+  console.log(`[CONFIG] Topic detection model: ${topicDetectionModel}`);
+}
 
 // Memory system configuration (Titans-inspired long-term memory)
 const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true
@@ -342,6 +359,8 @@ const databricksUrl =
     ? `${rawBaseUrl}${endpointPath.startsWith("/") ? "" : "/"}${endpointPath}`
     : null;
 
+// Set MODEL_DEFAULT env var to use a specific model (e.g. "llama3.1" for Ollama).
+// Without it, the default falls back to a Databricks Claude model regardless of MODEL_PROVIDER.
 const defaultModel =
   process.env.MODEL_DEFAULT ??
   (modelProvider === "azure-anthropic" ? "claude-opus-4-5" : "databricks-claude-sonnet-4-5");
@@ -592,6 +611,8 @@ var config = {
   modelProvider: {
     type: modelProvider,
     defaultModel,
+    suggestionModeModel,
+    topicDetectionModel,
     // Hybrid routing settings
     preferOllama,
     fallbackEnabled,
@@ -881,6 +902,8 @@ function reloadConfig() {
   config.modelProvider.preferOllama = process.env.PREFER_OLLAMA === "true";
   config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false";
   config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
+  config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
+  config.modelProvider.topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim();
 
   // Log level
   config.logger.level = process.env.LOG_LEVEL ?? "info";
diff --git a/src/context/compression.js b/src/context/compression.js
index 518aaba..47b0413 100644
--- a/src/context/compression.js
+++ b/src/context/compression.js
@@ -2,24 +2,63 @@
  * History Compression for Token Optimization
  *
  * Compresses conversation history to reduce token usage while
- * maintaining context quality. Uses sliding window approach:
- * - Keep recent turns verbatim
- * - Summarize older turns
- * - Compress tool results
+ * maintaining context quality. Uses sliding window approach with
+ * percentage-based tiered compression that scales with recency
+ * and the model's context window size.
  *
+ * Tiers:
+ * - veryRecent (last 4 messages): keep 90% of content
+ * - recent (messages 5-10): keep 50% of content
+ * - old (11+): keep 20% of content
  */
 
 const logger = require('../logger');
 const config = require('../config');
 
+// Compression tiers: ratio = percentage of content to keep, minFloor = minimum chars
+const COMPRESSION_TIERS = {
+  veryRecent: { ratio: 0.9, minFloor: 500 },
+  recent:     { ratio: 0.5, minFloor: 300 },
+  old:        { ratio: 0.2, minFloor: 200 },
+};
+
+// How many of the recent messages count as "very recent"
+const VERY_RECENT_COUNT = 4;
+
 /**
- * Compress conversation history to fit within token budget
+ * Compute the maximum character cap for a tier based on context window size.
+ *
+ * @param {number} contextWindowTokens - Model's context window in tokens (-1 = unknown)
+ * @param {string} tierName - "veryRecent", "recent", or "old"
+ * @returns {number} Maximum characters for tool result content in this tier
+ */
+function computeMaxCap(contextWindowTokens, tierName) {
+  // Convert tokens to chars (~4 chars/token), default to 8K tokens if unknown
+  const contextChars = (contextWindowTokens === -1 ? 8000 : contextWindowTokens) * 4;
+  const budgetRatios = {
+    veryRecent: 0.25,
+    recent: 0.10,
+    old: 0.03,
+  };
+  return Math.floor(contextChars * (budgetRatios[tierName] ?? 0.03));
+}
+
+/**
+ * Compute the character limit for a piece of content based on tier and context window.
  *
- * Strategy:
- * 1. Keep last N turns verbatim (fresh context)
- * 2. Summarize older turns (compressed history)
- * 3. Compress tool results to key information only
- * 4. Remove redundant exchanges
+ * @param {string} text - The text content
+ * @param {string} tierName - Tier name
+ * @param {number} contextWindowTokens - Context window in tokens
+ * @returns {number} Character limit
+ */
+function computeLimit(text, tierName, contextWindowTokens) {
+  const tier = COMPRESSION_TIERS[tierName] || COMPRESSION_TIERS.old;
+  const maxCap = computeMaxCap(contextWindowTokens, tierName);
+  return Math.min(maxCap, Math.max(tier.minFloor, Math.floor(text.length * tier.ratio)));
+}
+
+/**
+ * Compress conversation history to fit within token budget
  *
  * @param {Array} messages - Conversation history
  * @param {Object} options - Compression options
@@ -28,6 +67,8 @@ const config = require('../config');
 function compressHistory(messages, options = {}) {
   if (!messages || messages.length === 0) return messages;
 
+  const contextWindowTokens = options.contextWindowTokens ?? -1;
+
   const opts = {
     keepRecentTurns: options.keepRecentTurns ?? config.historyCompression?.keepRecentTurns ?? 10,
     summarizeOlder: options.summarizeOlder ?? config.historyCompression?.summarizeOlder ?? true,
@@ -58,12 +99,16 @@ function compressHistory(messages, options = {}) {
       compressed.push(summary);
     }
   } else {
-    // Just compress tool results in old messages
-    compressed = oldMessages.map(msg => compressMessage(msg));
+    // Compress tool results in old messages using "old" tier
+    compressed = oldMessages.map(msg => compressMessage(msg, "old", contextWindowTokens));
   }
 
-  // Add recent messages (may compress tool results but keep content)
-  const recentCompressed = recentMessages.map(msg => compressToolResults(msg));
+  // Add recent messages with tiered compression
+  const recentCompressed = recentMessages.map((msg, i) => {
+    const isVeryRecent = i >= recentMessages.length - VERY_RECENT_COUNT;
+    const tierName = isVeryRecent ? "veryRecent" : "recent";
+    return compressToolResults(msg, tierName, contextWindowTokens);
+  });
 
   const finalMessages = [...compressed, ...recentCompressed];
 
@@ -82,7 +127,8 @@ function compressHistory(messages, options = {}) {
       percentage: ((saved / originalLength) * 100).toFixed(1),
       splitIndex,
       oldMessages: oldMessages.length,
-      recentMessages: recentMessages.length
+      recentMessages: recentMessages.length,
+      contextWindowTokens,
     }, 'History compression applied');
   }
 
@@ -149,26 +195,28 @@ function summarizeOldHistory(messages) {
 }
 
 /**
- * Compress a single message
- *
- * Reduces message size while preserving essential information.
+ * Compress a single message (used for old messages outside the recent window)
  *
  * @param {Object} message - Message to compress
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Compressed message
  */
-function compressMessage(message) {
+function compressMessage(message, tierName = "old", contextWindowTokens = -1) {
   if (!message) return message;
 
+  const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens);
+
   const compressed = {
     role: message.role
   };
 
   // Compress content based on type
   if (typeof message.content === 'string') {
-    compressed.content = compressText(message.content, 300);
+    compressed.content = compressText(message.content, limit);
   } else if (Array.isArray(message.content)) {
     compressed.content = message.content
-      .map(block => compressContentBlock(block))
+      .map(block => compressContentBlock(block, tierName, contextWindowTokens))
       .filter(Boolean);
   } else {
     compressed.content = message.content;
@@ -180,13 +228,12 @@ function compressMessage(message) {
 /**
  * Compress tool results in a message while keeping other content
  *
- * Tool results can be very large. This compresses them while
- * keeping user and assistant text intact.
- *
  * @param {Object} message - Message to process
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Message with compressed tool results
  */
-function compressToolResults(message) {
+function compressToolResults(message, tierName = "recent", contextWindowTokens = -1) {
   if (!message) return message;
 
   const compressed = {
@@ -199,7 +246,7 @@ function compressToolResults(message) {
     compressed.content = message.content.map(block => {
       // Compress tool_result blocks
       if (block.type === 'tool_result') {
-        return compressToolResultBlock(block);
+        return compressToolResultBlock(block, tierName, contextWindowTokens);
       }
       // Keep other blocks as-is
       return block;
@@ -215,16 +262,20 @@ function compressToolResults(message) {
  * Compress a content block
  *
  * @param {Object} block - Content block
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object|null} Compressed block or null if removed
  */
-function compressContentBlock(block) {
+function compressContentBlock(block, tierName = "old", contextWindowTokens = -1) {
   if (!block) return null;
 
+  const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens);
+
   switch (block.type) {
     case 'text':
       return {
         type: 'text',
-        text: compressText(block.text, 300)
+        text: compressText(block.text, limit)
       };
 
     case 'tool_use':
@@ -237,7 +288,7 @@ function compressContentBlock(block) {
       };
 
     case 'tool_result':
-      return compressToolResultBlock(block);
+      return compressToolResultBlock(block, tierName, contextWindowTokens);
 
     default:
       return block;
@@ -247,13 +298,15 @@ function compressContentBlock(block) {
 /**
  * Compress tool result block
  *
- * Tool results can be very large (file contents, bash output).
- * Compress while preserving essential information.
+ * Uses dynamic limits based on compression tier and context window size
+ * instead of a hardcoded character limit.
  *
  * @param {Object} block - tool_result block
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Compressed tool_result
  */
-function compressToolResultBlock(block) {
+function compressToolResultBlock(block, tierName = "old", contextWindowTokens = -1) {
   if (!block || block.type !== 'tool_result') return block;
 
   const compressed = {
@@ -261,17 +314,20 @@ function compressToolResultBlock(block) {
     tool_use_id: block.tool_use_id,
   };
 
-  // Compress content
+  // Compress content using dynamic limits
   if (typeof block.content === 'string') {
-    compressed.content = compressText(block.content, 500);
+    const limit = computeLimit(block.content, tierName, contextWindowTokens);
+    compressed.content = compressText(block.content, limit);
   } else if (Array.isArray(block.content)) {
     compressed.content = block.content.map(item => {
       if (typeof item === 'string') {
-        return compressText(item, 500);
+        const limit = computeLimit(item, tierName, contextWindowTokens);
+        return compressText(item, limit);
       } else if (item.type === 'text') {
+        const limit = computeLimit(item.text || "", tierName, contextWindowTokens);
         return {
           type: 'text',
-          text: compressText(item.text, 500)
+          text: compressText(item.text, limit)
         };
       }
       return item;
@@ -456,4 +512,6 @@ module.exports = {
   calculateCompressionStats,
   needsCompression,
   summarizeOldHistory,
+  COMPRESSION_TIERS,
+  computeMaxCap,
 };
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index d553b69..3e07ac2 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -10,6 +10,7 @@ const tokens = require("../utils/tokens");
 const systemPrompt = require("../prompts/system");
 const historyCompression = require("../context/compression");
 const tokenBudget = require("../context/budget");
+const { getContextWindow } = require("../providers/context-window");
 const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection");
 const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom");
 const { createAuditLogger } = require("../logger/audit-logger");
@@ -669,53 +670,11 @@ function normaliseToolChoice(choice) {
 }
 
 /**
- * Strip thinking-style reasoning from Ollama model outputs
- * Patterns to remove:
- * - Lines starting with bullet points (●, •, -, *)
- * - Explanatory reasoning before the actual response
- * - Multiple newlines used to separate thinking from response
+ * Strip <think>...</think> tags that some models (DeepSeek, Qwen) emit for chain-of-thought reasoning.
  */
-function stripThinkingBlocks(text) {
+function stripThinkTags(text) {
   if (typeof text !== "string") return text;
-
-  // Split into lines
-  const lines = text.split("\n");
-  const cleanedLines = [];
-  let inThinkingBlock = false;
-  let consecutiveEmptyLines = 0;
-
-  for (const line of lines) {
-    const trimmed = line.trim();
-
-    // Detect thinking block markers (bullet points followed by reasoning)
-    if (/^[●•\-\*]\s/.test(trimmed)) {
-      inThinkingBlock = true;
-      continue;
-    }
-
-    // Empty lines might separate thinking from response
-    if (trimmed === "") {
-      consecutiveEmptyLines++;
-      // If we've seen 2+ empty lines, likely end of thinking block
-      if (consecutiveEmptyLines >= 2) {
-        inThinkingBlock = false;
-      }
-      continue;
-    }
-
-    // Reset empty line counter
-    consecutiveEmptyLines = 0;
-
-    // Skip lines that are part of thinking block
-    if (inThinkingBlock) {
-      continue;
-    }
-
-    // Keep this line
-    cleanedLines.push(line);
-  }
-
-  return cleanedLines.join("\n").trim();
+  return text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
 }
 
 function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
@@ -732,7 +691,7 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
 
   // Add text content if present, after stripping thinking blocks
   if (typeof rawContent === "string" && rawContent.trim()) {
-    const cleanedContent = stripThinkingBlocks(rawContent);
+    const cleanedContent = stripThinkTags(rawContent);
     if (cleanedContent) {
       contentItems.push({ type: "text", text: cleanedContent });
     }
@@ -919,6 +878,10 @@ function sanitizePayload(payload) {
         : "claude-opus-4-5";
     clean.model = azureDefaultModel;
   } else if (providerType === "ollama") {
+    // Override client model with Ollama config model
+    const ollamaConfiguredModel = config.ollama?.model;
+    clean.model = ollamaConfiguredModel;
+
     // Ollama format conversion
     // Check if model supports tools
     const { modelNameSupportsTools } = require("../clients/ollama-utils");
@@ -1024,8 +987,15 @@ function sanitizePayload(payload) {
       }
 
       // Very short messages (< 20 chars) without code/technical keywords
+      // BUT: Common shell commands should NOT be treated as conversational
+      const shellCommands = /^(pwd|ls|cd|cat|echo|grep|find|ps|top|df|du|whoami|which|env)[\s\.\!\?]*$/;
+      if (shellCommands.test(trimmed)) {
+        logger.info({ matched: "shell_command", trimmed }, "Ollama conversational check - SHELL COMMAND detected, keeping tools");
+        return false; // NOT conversational - needs tools!
+      }
+
       if (trimmed.length < 20 && !/code|file|function|error|bug|fix|write|read|create/.test(trimmed)) {
-        logger.debug({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - matched");
+        logger.warn({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - SHORT MESSAGE matched, DELETING TOOLS");
         return true;
       }
 
@@ -1035,13 +1005,16 @@ function sanitizePayload(payload) {
 
     if (isConversational) {
       // Strip all tools for simple conversational messages
+      const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0;
       delete clean.tools;
       delete clean.tool_choice;
-      logger.debug({
+      clean._noToolInjection = true;
+      logger.warn({
         model: config.ollama?.model,
-        message: "Removed tools for conversational message"
-      }, "Ollama conversational mode");
-    } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
+        message: "Removed tools for conversational message",
+        originalToolCount,
+        userMessage: clean.messages?.[clean.messages.length - 1]?.content?.substring(0, 50),
+      }, "Ollama conversational mode - ALL TOOLS DELETED!");    } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
       // Ollama performance degrades with too many tools
       // Limit to essential tools only
       const OLLAMA_ESSENTIAL_TOOLS = new Set([
@@ -1052,7 +1025,8 @@ function sanitizePayload(payload) {
         "Glob",
         "Grep",
         "WebSearch",
-        "WebFetch"
+        "WebFetch",
+        "shell",  // Tool is registered as "shell" internally
       ]);
 
       const limitedTools = clean.tools.filter(tool =>
@@ -1140,6 +1114,9 @@ function sanitizePayload(payload) {
     }
 
     clean.tools = selectedTools.length > 0 ? selectedTools : undefined;
+    if (!selectedTools.length) {
+      clean._noToolInjection = true;
+    }
   }
 
   clean.stream = payload.stream ?? false;
@@ -1234,6 +1211,28 @@ function sanitizePayload(payload) {
     toolCount: clean.tools?.length ?? 0
   }, '[CONTEXT_FLOW] After sanitizePayload');
 
+  // === Suggestion mode: tag request and override model if configured ===
+  const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages);
+  clean._requestMode = isSuggestion ? "suggestion" : "main";
+  const smConfig = config.modelProvider?.suggestionModeModel ?? "default";
+  if (isSuggestion && smConfig.toLowerCase() !== "default" && smConfig.toLowerCase() !== "none") {
+    clean.model = smConfig;
+    clean._suggestionModeModel = smConfig;
+  }
+
+  // === Topic detection: tag request and override model if configured ===
+  if (clean._requestMode === "main") {
+    const { isTopicDetection: isTopic } = detectTopicDetection(clean);
+    if (isTopic) {
+      clean._requestMode = "topic";
+      const tdConfig = config.modelProvider?.topicDetectionModel ?? "default";
+      if (tdConfig.toLowerCase() !== "default") {
+        clean.model = tdConfig;
+        clean._topicDetectionModel = tdConfig;
+      }
+    }
+  }
+
   return clean;
 }
 
@@ -1330,9 +1329,12 @@ async function runAgentLoop({
   providerType,
   headers,
 }) {
-  console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length);
+  console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length, 'mode:', cleanPayload._requestMode || 'main', 'model:', cleanPayload.model);
   logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED');
   const settings = resolveLoopOptions(options);
+  // Detect context window size for intelligent compression
+  const contextWindowTokens = await getContextWindow();
+  console.log('[DEBUG] Context window detected:', contextWindowTokens, 'tokens for provider:', providerType);
   // Initialize audit logger (no-op if disabled)
   const auditLogger = createAuditLogger(config.audit);
   const start = Date.now();
@@ -1340,8 +1342,22 @@ async function runAgentLoop({
   let toolCallsExecuted = 0;
   let fallbackPerformed = false;
   const toolCallNames = new Map();
-  const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count
+  const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> counta
   let loopWarningInjected = false; // Track if we've already warned about loops
+  let emptyResponseRetried = false; // Track if we've retried after an empty LLM response
+
+  // Log agent loop start
+  logger.info(
+    {
+      sessionId: session?.id ?? null,
+      model: requestedModel,
+      maxSteps: settings.maxSteps,
+      maxDurationMs: settings.maxDurationMs,
+      wantsThinking,
+      providerType,
+    },
+    "Agent loop started",
+  );
 
   while (steps < settings.maxSteps) {
     if (Date.now() - start > settings.maxDurationMs) {
@@ -1378,7 +1394,6 @@ async function runAgentLoop({
     }
 
     steps += 1;
-    console.log('[LOOP DEBUG] Entered while loop - step:', steps);
     logger.debug(
       {
         sessionId: session?.id ?? null,
@@ -1409,7 +1424,8 @@ async function runAgentLoop({
           cleanPayload.messages = historyCompression.compressHistory(originalMessages, {
             keepRecentTurns: config.historyCompression?.keepRecentTurns ?? 10,
             summarizeOlder: config.historyCompression?.summarizeOlder ?? true,
-            enabled: true
+            enabled: true,
+            contextWindowTokens,
           });
 
           if (cleanPayload.messages !== originalMessages) {
@@ -1694,8 +1710,88 @@ IMPORTANT TOOL USAGE RULES:
     });
   }
 
-  const databricksResponse = await invokeModel(cleanPayload);
+  // === DEBUG: Log request to LLM ===
+  console.log('\n[LLM REQUEST]', new Date().toISOString(), 'step:', steps, 'model:', cleanPayload.model, 'provider:', providerType, 'mode:', cleanPayload._requestMode || 'main');
+  console.log('[LLM REQUEST] messages (' + (cleanPayload.messages?.length ?? 0) + '):');
+  for (const m of (cleanPayload.messages || [])) {
+    const preview = typeof m.content === 'string'
+      ? m.content.substring(0, 200)
+      : Array.isArray(m.content)
+        ? m.content.map(b => b.type + ':' + (b.text || b.name || b.tool_use_id || '').substring(0, 80)).join(' | ')
+        : JSON.stringify(m.content).substring(0, 200);
+    console.log('  [' + m.role + '] ' + preview);
+  }
+  console.log('[LLM REQUEST] tools:', (cleanPayload.tools || []).map(t => t.name || t.function?.name).join(', ') || '(none)');
+  console.log('[LLM REQUEST] _noToolInjection:', !!cleanPayload._noToolInjection);
+
+  let databricksResponse;
+  try {
+    databricksResponse = await invokeModel(cleanPayload);
+  } catch (modelError) {
+    const isConnectionError = modelError.cause?.code === 'ECONNREFUSED'
+      || modelError.message?.includes('fetch failed')
+      || modelError.code === 'ECONNREFUSED';
+    if (isConnectionError) {
+      console.error(`[LLM ERROR] ${new Date().toISOString()} Provider ${providerType} is unreachable (connection refused). Is it running?`);
+      return {
+        response: {
+          status: 503,
+          body: {
+            error: {
+              type: "provider_unreachable",
+              message: `Provider ${providerType} is unreachable. Is the service running?`,
+            },
+          },
+          terminationReason: "provider_unreachable",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "provider_unreachable",
+      };
+    }
+    throw modelError;
+  }
 
+  // === DEBUG: Log response from LLM ===
+  console.log('\n[LLM RESPONSE]', new Date().toISOString(), 'ok:', databricksResponse.ok, 'status:', databricksResponse.status, 'stream:', !!databricksResponse.stream, 'mode:', cleanPayload._requestMode || 'main');
+  if (databricksResponse.json) {
+    const rj = databricksResponse.json;
+    // Anthropic format
+    if (rj.content) {
+      console.log('[LLM RESPONSE] Anthropic format - content blocks:', Array.isArray(rj.content) ? rj.content.length : typeof rj.content);
+      if (Array.isArray(rj.content)) {
+        for (const b of rj.content) {
+          if (b.type === 'text') console.log('  [text] ' + (b.text || '').substring(0, 300));
+          else if (b.type === 'tool_use') console.log('  [tool_use] ' + b.name + '(' + JSON.stringify(b.input).substring(0, 200) + ')');
+          else console.log('  [' + b.type + ']');
+        }
+      }
+      console.log('[LLM RESPONSE] stop_reason:', rj.stop_reason);
+    }
+    // OpenAI format
+    if (rj.choices) {
+      const msg = rj.choices[0]?.message;
+      console.log('[LLM RESPONSE] OpenAI format - finish_reason:', rj.choices[0]?.finish_reason);
+      console.log('  [content] ' + (msg?.content || '(null)'));
+      if (msg?.tool_calls?.length) {
+        for (const tc of msg.tool_calls) {
+          console.log('  [tool_call] ' + (tc.function?.name || tc.name) + '(' + (tc.function?.arguments || '') + ')');
+        }
+      }
+    }
+    // Ollama format
+    if (rj.message && !rj.choices && !rj.content) {
+      console.log('[LLM RESPONSE] Ollama format - done:', rj.done);
+      console.log('  [content] ' + (rj.message.content || '(empty)'));
+      if (rj.message.tool_calls?.length) {
+        for (const tc of rj.message.tool_calls) {
+          console.log('  [tool_call] ' + (tc.function?.name || tc.name) + '(' + JSON.stringify(tc.function?.arguments || {}) + ')');
+        }
+      }
+    }
+  } else {
+    console.log('[LLM RESPONSE] no json body - raw:', String(databricksResponse.body || ''));
+  }
   // Extract and log actual token usage
   const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false
     ? tokens.extractUsageFromResponse(databricksResponse.json)
@@ -1761,6 +1857,15 @@ IMPORTANT TOOL USAGE RULES:
       });
     }
   }
+    logger.info({
+      messageContent: databricksResponse.json?.message?.content
+        ? (typeof databricksResponse.json.message.content === 'string'
+          ? databricksResponse.json.message.content.substring(0, 500)
+          : JSON.stringify(databricksResponse.json.message.content).substring(0, 500))
+        : 'NO_CONTENT',
+      hasToolCalls: !!databricksResponse.json?.message?.tool_calls,
+      toolCallCount: databricksResponse.json?.message?.tool_calls?.length || 0
+    }, "=== RAW LLM RESPONSE CONTENT ===");
 
     // Handle streaming responses (pass through without buffering)
     if (databricksResponse.stream) {
@@ -1860,11 +1965,13 @@ IMPORTANT TOOL USAGE RULES:
           _anthropic_block: block,
         }));
 
-      logger.debug(
+      logger.info(
         {
           sessionId: session?.id ?? null,
+          step: steps,
           contentBlocks: contentArray.length,
           toolCallsFound: toolCalls.length,
+          toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
           stopReason: databricksResponse.json?.stop_reason,
         },
         "Azure Anthropic response parsed",
@@ -1874,13 +1981,182 @@ IMPORTANT TOOL USAGE RULES:
       const choice = databricksResponse.json?.choices?.[0];
       message = choice?.message ?? {};
       toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+      // Deduplicate tool calls for OpenAI format too
+      if (toolCalls.length > 0) {
+        const uniqueToolCalls = [];
+        const seenSignatures = new Set();
+        let duplicatesRemoved = 0;
+
+        for (const call of toolCalls) {
+          const signature = getToolCallSignature(call);
+          if (!seenSignatures.has(signature)) {
+            seenSignatures.add(signature);
+            uniqueToolCalls.push(call);
+          } else {
+            duplicatesRemoved++;
+            logger.warn({
+              sessionId: session?.id ?? null,
+              toolName: call.function?.name || call.name,
+              toolId: call.id,
+              signature: signature.substring(0, 32),
+            }, "Duplicate tool call removed (same tool with identical parameters in single response)");
+          }
+        }
+
+        toolCalls = uniqueToolCalls;
+
+        logger.info(
+          {
+            sessionId: session?.id ?? null,
+            step: steps,
+            toolCallsFound: toolCalls.length,
+            duplicatesRemoved,
+            toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+          },
+          "LLM Response: Tool calls requested (after deduplication)",
+        );
+      } else if (providerType === "ollama") {
+        // Ollama format: { message: { role, content, tool_calls }, done }
+        message = databricksResponse.json?.message ?? {};
+        toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+        logger.info({
+          hasMessage: !!databricksResponse.json?.message,
+          hasToolCalls: toolCalls.length > 0,
+          toolCallCount: toolCalls.length,
+          toolNames: toolCalls.map(tc => tc.function?.name),
+          done: databricksResponse.json?.done,
+          fullToolCalls: JSON.stringify(toolCalls),
+          fullResponseMessage: JSON.stringify(databricksResponse.json?.message)
+        }, "=== OLLAMA TOOL CALLS EXTRACTION ===");
+      } else {
+        // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] }
+        const choice = databricksResponse.json?.choices?.[0];
+        message = choice?.message ?? {};
+        toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+        // Deduplicate tool calls for OpenAI format too
+        if (toolCalls.length > 0) {
+          const uniqueToolCalls = [];
+          const seenSignatures = new Set();
+          let duplicatesRemoved = 0;
+
+          for (const call of toolCalls) {
+            const signature = getToolCallSignature(call);
+
+            if (!seenSignatures.has(signature)) {
+              seenSignatures.add(signature);
+              uniqueToolCalls.push(call);
+            } else {
+              duplicatesRemoved++;
+              logger.warn({
+                sessionId: session?.id ?? null,
+                toolName: call.function?.name || call.name,
+                toolId: call.id,
+                signature: signature.substring(0, 32),
+              }, "Duplicate tool call removed (same tool with identical parameters in single response)");
+            }
+          }
+
+          toolCalls = uniqueToolCalls;
+
+          logger.info(
+            {
+              sessionId: session?.id ?? null,
+              step: steps,
+              toolCallsFound: toolCalls.length,
+              duplicatesRemoved,
+              toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+            },
+            "LLM Response: Tool calls requested (after deduplication)",
+          );
+        }
+      }
+    }
+
+    // === EMPTY RESPONSE DETECTION (primary) ===
+    // Check raw extracted message for empty content before tool handling or conversion
+    const rawTextContent = (() => {
+      if (typeof message.content === 'string') return message.content.trim();
+      if (Array.isArray(message.content)) {
+        return message.content
+          .filter(b => b.type === 'text')
+          .map(b => b.text || '')
+          .join('')
+          .trim();
+      }
+      return '';
+    })();
+
+    if (toolCalls.length === 0 && !rawTextContent) {
+      console.log('[EMPTY RESPONSE] No text content and no tool calls - step:', steps, 'retried:', emptyResponseRetried);
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        messageKeys: Object.keys(message),
+        contentType: typeof message.content,
+        rawContentPreview: String(message.content || '').substring(0, 100),
+      }, "Empty LLM response detected (no text, no tool calls)");
+
+      // Retry once with a nudge
+      if (steps < settings.maxSteps && !emptyResponseRetried) {
+        emptyResponseRetried = true;
+        cleanPayload.messages.push({
+          role: "assistant",
+          content: "",
+        });
+        cleanPayload.messages.push({
+          role: "user",
+          content: "Please provide a response to the user's message.",
+        });
+        logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge");
+        continue;
+      }
+
+      // Fallback after retry also returned empty
+      logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry");
+      return {
+        response: {
+          status: 200,
+          body: {
+            id: `msg_${Date.now()}`,
+            type: "message",
+            role: "assistant",
+            model: requestedModel,
+            content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }],
+            stop_reason: "end_turn",
+            usage: { input_tokens: 0, output_tokens: 0 },
+          },
+          terminationReason: "empty_response_fallback",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "empty_response_fallback",
+      };
+    }
+
+    // Guard: drop hallucinated tool calls when no tools were sent to the model.
+    // Some models (e.g. Llama 3.1) hallucinate tool_call blocks from conversation
+    // history even when the request contained zero tool definitions.
+    const toolsWereSent = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
+    if (toolCalls.length > 0 && !toolsWereSent) {
+      console.log('[HALLUCINATION GUARD] Model returned', toolCalls.length, 'tool call(s) but no tools were offered — ignoring:', toolCalls.map(tc => tc.function?.name || tc.name));
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        hallucinated: toolCalls.map(tc => tc.function?.name || tc.name),
+        noToolInjection: !!cleanPayload._noToolInjection,
+      }, "Dropped hallucinated tool calls (no tools were sent to model)");
+      toolCalls = [];
+      // If there's also no text content, treat as empty response (handled below)
     }
 
     if (toolCalls.length > 0) {
       // Convert OpenAI/OpenRouter format to Anthropic format for session storage
       let sessionContent;
       if (providerType === "azure-anthropic") {
-        // Azure Anthropic already returns content in Anthropic format
+        // Azure Anthropic already returns content in Anthropic
         sessionContent = databricksResponse.json?.content ?? [];
       } else {
         // Convert OpenAI/OpenRouter format to Anthropic content blocks
@@ -2129,6 +2405,7 @@ IMPORTANT TOOL USAGE RULES:
               session,
               cwd,
               requestMessages: cleanPayload.messages,
+              providerType,
             }))
           );
 
@@ -2175,6 +2452,15 @@ IMPORTANT TOOL USAGE RULES:
 
             cleanPayload.messages.push(toolMessage);
 
+            logger.info(
+              {
+                toolName: execution.name,
+                content: typeof toolMessage.content === 'string'
+                ? toolMessage.content.substring(0, 500)
+                : JSON.stringify(toolMessage.content).substring(0, 500)
+              }, "Tool result content sent to LLM",
+            );
+
             // Convert to Anthropic format for session storage
             let sessionToolResultContent;
             if (providerType === "azure-anthropic") {
@@ -2362,8 +2648,18 @@ IMPORTANT TOOL USAGE RULES:
           session,
           cwd,
           requestMessages: cleanPayload.messages,
+          providerType,
         });
 
+        logger.debug(
+          {
+            id: execution.id ?? null,
+            name: execution.name ?? null,
+            arguments: execution.arguments ?? null,
+            content: execution.content ?? null,
+            is_error: execution.ok === false,
+          }, "executeToolCall response" );
+
         let toolMessage;
         if (providerType === "azure-anthropic") {
           const parsedContent = parseExecutionContent(execution.content);
@@ -2562,7 +2858,16 @@ IMPORTANT TOOL USAGE RULES:
         }
       }
 
-      continue;
+      logger.info({
+        sessionId: session?.id ?? null,
+        step: steps,
+        toolCallsExecuted: toolCallsExecuted,
+        totalToolCallsInThisStep: toolCalls.length,
+        messageCount: cleanPayload.messages.length,
+        lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role,
+      }, "Tool execution complete");
+
+      continue; // Loop back to invoke model with tool results in context
     }
 
     let anthropicPayload;
@@ -2824,6 +3129,68 @@ IMPORTANT TOOL USAGE RULES:
       anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
     }
 
+    // === EMPTY RESPONSE DETECTION (safety net — post-conversion) ===
+    // Primary detection is earlier (before tool handling). This catches edge cases
+    // where conversion produces empty content from non-empty raw data.
+    const hasTextContent = (() => {
+      if (Array.isArray(anthropicPayload.content)) {
+        return anthropicPayload.content.some(b => b.type === "text" && b.text?.trim());
+      }
+      if (typeof anthropicPayload.content === "string") {
+        return anthropicPayload.content.trim().length > 0;
+      }
+      return false;
+    })();
+
+    const hasToolUseBlocks = Array.isArray(anthropicPayload.content) &&
+      anthropicPayload.content.some(b => b.type === "tool_use");
+
+    if (!hasToolUseBlocks && !hasTextContent) {
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        messageKeys: Object.keys(anthropicPayload),
+        contentType: typeof anthropicPayload.content,
+        contentLength: Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content || "").length,
+      }, "Empty LLM response detected (no text, no tool calls)");
+
+      // Retry once with a nudge
+      if (steps < settings.maxSteps && !emptyResponseRetried) {
+        emptyResponseRetried = true;
+        cleanPayload.messages.push({
+          role: "assistant",
+          content: "",
+        });
+        cleanPayload.messages.push({
+          role: "user",
+          content: "Please provide a response to the user's message.",
+        });
+        logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge");
+        continue;  // Go back to top of while loop
+      }
+
+      // If retry also returned empty, return a fallback message
+      logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry");
+      return {
+        response: {
+          status: 200,
+          body: {
+            id: `msg_${Date.now()}`,
+            type: "message",
+            role: "assistant",
+            model: requestedModel,
+            content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }],
+            stop_reason: "end_turn",
+            usage: { input_tokens: 0, output_tokens: 0 },
+          },
+          terminationReason: "empty_response_fallback",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "empty_response_fallback",
+      };
+    }
+
     // Ensure content is an array before calling .find()
     const content = Array.isArray(anthropicPayload.content) ? anthropicPayload.content : [];
     const fallbackCandidate = content.find(
@@ -3009,6 +3376,7 @@ IMPORTANT TOOL USAGE RULES:
             session,
             cwd,
             requestMessages: cleanPayload.messages,
+            providerType,            
           });
 
           const toolResultMessage = createFallbackToolResultMessage(providerType, {
@@ -3151,6 +3519,18 @@ IMPORTANT TOOL USAGE RULES:
       },
       "Agent loop completed successfully",
     );
+
+    // DIAGNOSTIC: Log response being returned
+    logger.info({
+      sessionId: session?.id ?? null,
+      status: 200,
+      hasBody: !!anthropicPayload,
+      bodyKeys: anthropicPayload ? Object.keys(anthropicPayload) : [],
+      contentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none',
+      contentLength: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content).length) : 0,
+      stopReason: anthropicPayload?.stop_reason
+    }, "=== RETURNING RESPONSE TO CLIENT ===");
+
     return {
       response: {
         status: 200,
@@ -3217,6 +3597,114 @@ IMPORTANT TOOL USAGE RULES:
   };
 }
 
+/**
+ * Detect if the current request is a suggestion mode call.
+ * Scans the last user message for the [SUGGESTION MODE: marker.
+ * @param {Array} messages - The conversation messages
+ * @returns {{ isSuggestionMode: boolean }}
+ */
+function detectSuggestionMode(messages) {
+  if (!Array.isArray(messages) || messages.length === 0) {
+    return { isSuggestionMode: false };
+  }
+  // Scan from the end to find the last user message
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg?.role !== 'user') continue;
+    const content = typeof msg.content === 'string'
+      ? msg.content
+      : Array.isArray(msg.content)
+        ? msg.content.map(b => b.text || '').join(' ')
+        : '';
+    if (content.includes('[SUGGESTION MODE:')) {
+      return { isSuggestionMode: true };
+    }
+    // Only check the last user message
+    break;
+  }
+  return { isSuggestionMode: false };
+}
+
+/**
+ * Detect if the current request is a topic detection/classification call.
+ * These requests typically have a system prompt asking to classify conversation
+ * topics, with no tools and very short messages. They waste GPU time on large
+ * models (30-90s just to classify a topic).
+ *
+ * Detection heuristics:
+ *  1. System prompt contains topic classification instructions
+ *  2. No tools in the payload (topic detection never needs tools)
+ *  3. Short message count (typically 1-3 messages)
+ *
+ * @param {Object} payload - The request payload
+ * @returns {{ isTopicDetection: boolean }}
+ */
+function detectTopicDetection(payload) {
+  if (!payload) return { isTopicDetection: false };
+
+  // Topic detection requests have no tools
+  if (Array.isArray(payload.tools) && payload.tools.length > 0) {
+    return { isTopicDetection: false };
+  }
+
+  // Check system prompt for topic classification patterns
+  const systemText = typeof payload.system === 'string'
+    ? payload.system
+    : Array.isArray(payload.system)
+      ? payload.system.map(b => b.text || '').join(' ')
+      : '';
+
+  // Also check first message if system prompt is embedded there
+  let firstMsgText = '';
+  if (Array.isArray(payload.messages) && payload.messages.length > 0) {
+    const first = payload.messages[0];
+    if (first?.role === 'user' || first?.role === 'system') {
+      firstMsgText = typeof first.content === 'string'
+        ? first.content
+        : Array.isArray(first.content)
+          ? first.content.map(b => b.text || '').join(' ')
+          : '';
+    }
+  }
+
+  const combined = systemText + ' ' + firstMsgText;
+  const lc = combined.toLowerCase();
+
+  // Match patterns that Claude Code uses for topic detection
+  const topicPatterns = [
+    'new conversation topic',
+    'topic change',
+    'classify the topic',
+    'classify this message',
+    'conversation topic',
+    'topic classification',
+    'determines the topic',
+    'determine the topic',
+    'categorize the topic',
+    'what topic',
+    'identify the topic',
+  ];
+
+  const hasTopicPattern = topicPatterns.some(p => lc.includes(p));
+
+  if (hasTopicPattern) {
+    return { isTopicDetection: true };
+  }
+
+  // Additional heuristic: very short payload with no tools and system prompt
+  // mentioning "topic" or "classify"
+  if (
+    !payload.tools &&
+    Array.isArray(payload.messages) &&
+    payload.messages.length <= 3 &&
+    (lc.includes('topic') || lc.includes('classify'))
+  ) {
+    return { isTopicDetection: true };
+  }
+
+  return { isTopicDetection: false };
+}
+
 async function processMessage({ payload, headers, session, cwd, options = {} }) {
   const requestedModel =
     payload?.model ??
@@ -3226,6 +3714,32 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
     typeof headers?.["anthropic-beta"] === "string" &&
     headers["anthropic-beta"].includes("interleaved-thinking");
 
+  // === SUGGESTION MODE: Early return when SUGGESTION_MODE_MODEL=none ===
+  const { isSuggestionMode } = detectSuggestionMode(payload?.messages);
+  const suggestionModelConfig = config.modelProvider?.suggestionModeModel ?? "default";
+  if (isSuggestionMode && suggestionModelConfig.toLowerCase() === "none") {
+    console.log('[SUGGESTION MODE] Skipping LLM call (SUGGESTION_MODE_MODEL=none)');
+    return {
+      response: {
+        body: {
+          id: `msg_suggestion_skip_${Date.now()}`,
+          type: "message",
+          role: "assistant",
+          content: [{ type: "text", text: "" }],
+          model: requestedModel,
+          stop_reason: "end_turn",
+          stop_sequence: null,
+          usage: { input_tokens: 0, output_tokens: 0 },
+        },
+        ok: true,
+        status: 200,
+      },
+      steps: 0,
+      durationMs: 0,
+      terminationReason: "suggestion_mode_skip",
+    };
+  }
+
   // === TOOL LOOP GUARD (EARLY CHECK) ===
   // Check BEFORE sanitization since sanitizePayload removes conversation history
   const toolLoopThreshold = config.policy?.toolLoopThreshold ?? 3;
diff --git a/src/providers/context-window.js b/src/providers/context-window.js
new file mode 100644
index 0000000..dcea89d
--- /dev/null
+++ b/src/providers/context-window.js
@@ -0,0 +1,144 @@
+/**
+ * Context Window Detection
+ *
+ * Queries the active provider for its context window size (in tokens).
+ * Returns -1 if unknown. Caches the result for the lifetime of the process.
+ */
+
+const config = require("../config");
+const logger = require("../logger");
+
+// Known context sizes for proprietary models (tokens)
+const KNOWN_CONTEXT_SIZES = {
+  // Anthropic
+  "claude-3-opus": 200000,
+  "claude-3-sonnet": 200000,
+  "claude-3-haiku": 200000,
+  "claude-3.5-sonnet": 200000,
+  "claude-4": 200000,
+  // OpenAI
+  "gpt-4o": 128000,
+  "gpt-4o-mini": 128000,
+  "gpt-4-turbo": 128000,
+  "gpt-4": 8192,
+  "gpt-3.5-turbo": 16385,
+};
+
+// null = not yet detected, -1 = detected but unknown, >0 = known
+let cachedContextWindow = null;
+
+async function detectContextWindow() {
+  const provider = config.modelProvider.type;
+
+  try {
+    if (provider === "ollama") {
+      return await detectOllamaContextWindow();
+    }
+    if (provider === "openrouter") {
+      return await detectOpenRouterContextWindow();
+    }
+    if (provider === "openai") {
+      return detectFromKnownSizes(config.openai.model);
+    }
+    // azure-anthropic, bedrock — use known Anthropic sizes
+    if (["azure-anthropic", "bedrock"].includes(provider)) {
+      return 200000;
+    }
+    if (provider === "azure-openai") {
+      return detectFromKnownSizes(config.azureOpenAI.deployment);
+    }
+    if (provider === "llamacpp" || provider === "lmstudio") {
+      return -1; // No standard API to query
+    }
+    if (provider === "zai") {
+      return 128000; // GLM-4 family
+    }
+    if (provider === "vertex") {
+      return 1000000; // Gemini models
+    }
+  } catch (err) {
+    logger.warn({ err, provider }, "Failed to detect context window");
+  }
+
+  return -1;
+}
+
+async function detectOllamaContextWindow() {
+  const endpoint = `${config.ollama.endpoint}/api/show`;
+  const response = await fetch(endpoint, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ name: config.ollama.model }),
+    signal: AbortSignal.timeout(5000),
+  });
+  if (!response.ok) return -1;
+  const data = await response.json();
+
+  // Ollama prefixes context_length with the architecture name
+  // (e.g. "llama.context_length", "qwen2.context_length", "gemma.context_length")
+  // Search for any key ending in ".context_length" or exactly "context_length"
+  if (data.model_info && typeof data.model_info === "object") {
+    for (const [key, value] of Object.entries(data.model_info)) {
+      if (key === "context_length" || key.endsWith(".context_length")) {
+        if (typeof value === "number" && value > 0) return value;
+      }
+    }
+  }
+
+  // Fallback: parse from parameters string (e.g. "num_ctx 32768")
+  const match = data.parameters?.match(/num_ctx\s+(\d+)/);
+  if (match) return parseInt(match[1], 10);
+  return -1;
+}
+
+async function detectOpenRouterContextWindow() {
+  const baseEndpoint = config.openrouter.endpoint || "https://openrouter.ai/api/v1/chat/completions";
+  // Derive the models endpoint from the chat endpoint
+  const modelsEndpoint = baseEndpoint.replace(/\/v1\/chat\/completions$/, "/v1/models");
+  const response = await fetch(modelsEndpoint, {
+    headers: { Authorization: `Bearer ${config.openrouter.apiKey}` },
+    signal: AbortSignal.timeout(5000),
+  });
+  if (!response.ok) return -1;
+  const data = await response.json();
+  const model = data.data?.find((m) => m.id === config.openrouter.model);
+  return model?.context_length ?? -1;
+}
+
+function detectFromKnownSizes(modelName) {
+  if (!modelName) return -1;
+  const lower = modelName.toLowerCase();
+  for (const [key, size] of Object.entries(KNOWN_CONTEXT_SIZES)) {
+    if (lower.includes(key)) return size;
+  }
+  return -1;
+}
+
+async function getContextWindow() {
+  if (cachedContextWindow !== null) return cachedContextWindow;
+  cachedContextWindow = await detectContextWindow();
+  if (cachedContextWindow === -1) {
+    logger.warn(
+      { provider: config.modelProvider.type },
+      "Could not detect context window size — falling back to 8K tokens. " +
+      "Compression may be more aggressive than necessary.",
+    );
+  } else {
+    logger.info(
+      { contextWindow: cachedContextWindow, provider: config.modelProvider.type },
+      "Context window detected",
+    );
+  }
+  return cachedContextWindow;
+}
+
+function resetCache() {
+  cachedContextWindow = null;
+}
+
+module.exports = {
+  getContextWindow,
+  detectContextWindow,
+  resetCache,
+  KNOWN_CONTEXT_SIZES,
+};
diff --git a/src/tools/index.js b/src/tools/index.js
index 11227f0..95f4807 100644
--- a/src/tools/index.js
+++ b/src/tools/index.js
@@ -88,8 +88,43 @@ const TOOL_ALIASES = {
   runtests: "workspace_test_run",
   testsummary: "workspace_test_summary",
   testhistory: "workspace_test_history",
+  // Glob has dedicated tool in src/tools/indexer.js (registerGlobTool)
+  // - returns plain text format instead of JSON
+  // glob: "workspace_list",
+  // Glob: "workspace_list",
 };
 
+/**
+ * Recursively parse string values that look like JSON arrays/objects.
+ * Some providers double-serialize nested parameters (e.g. questions: "[{...}]"
+ * instead of questions: [{...}]), which causes schema validation failures.
+ */
+function deepParseStringifiedJson(obj) {
+  if (typeof obj !== "object" || obj === null) return obj;
+  if (Array.isArray(obj)) return obj.map(deepParseStringifiedJson);
+
+  const result = {};
+  for (const [key, value] of Object.entries(obj)) {
+    if (typeof value === "string") {
+      const trimmed = value.trim();
+      if (
+        (trimmed.startsWith("[") && trimmed.endsWith("]")) ||
+        (trimmed.startsWith("{") && trimmed.endsWith("}"))
+      ) {
+        try {
+          result[key] = deepParseStringifiedJson(JSON.parse(trimmed));
+          continue;
+        } catch {
+          // Not valid JSON, keep as string
+        }
+      }
+    }
+    result[key] =
+      typeof value === "object" ? deepParseStringifiedJson(value) : value;
+  }
+  return result;
+}
+
 function coerceString(value) {
   if (value === undefined || value === null) return "";
   if (typeof value === "string") return value;
@@ -124,24 +159,65 @@ function normalizeHandlerResult(result) {
   return { ok, status, content, metadata };
 }
 
-function parseArguments(call) {
+function parseArguments(call, providerType = null) {
   const raw = call?.function?.arguments;
-  if (typeof raw !== "string" || raw.trim().length === 0) return {};
+
+  // DEBUG: Log full call structure for diagnosis
+  logger.info({
+    providerType,
+    fullCall: JSON.stringify(call),
+    hasFunction: !!call?.function,
+    functionKeys: call?.function ? Object.keys(call.function) : [],
+    argumentsType: typeof raw,
+    argumentsValue: raw,
+    argumentsIsNull: raw === null,
+    argumentsIsUndefined: raw === undefined,
+  }, "=== PARSING TOOL ARGUMENTS ===");
+
+  // Ollama sends arguments as an object, OpenAI as a JSON string
+  if (typeof raw === "object" && raw !== null) {
+    if (providerType !== "ollama") {
+      logger.warn({
+        providerType,
+        expectedProvider: "ollama",
+        argumentsType: typeof raw,
+        arguments: raw
+      }, `Received object arguments but provider is ${providerType || "unknown"}, expected ollama format. Continuing with object.`);
+    } else {
+      logger.info({
+        type: "object",
+        arguments: raw
+      }, "Tool arguments already parsed (Ollama format)");
+    }
+    return deepParseStringifiedJson(raw);
+  }
+
+  if (typeof raw !== "string" || raw.trim().length === 0) {
+    logger.warn({
+      argumentsType: typeof raw,
+      argumentsEmpty: !raw || raw.trim().length === 0,
+      providerType
+    }, "Arguments not a string or empty - returning {}");
+    return {};
+  }
+
   try {
-    return JSON.parse(raw);
+    const parsed = JSON.parse(raw);
+    logger.info({ parsed }, "Parsed JSON string arguments");
+    return deepParseStringifiedJson(parsed);
   } catch (err) {
-    logger.warn({ err }, "Failed to parse tool arguments");
+    logger.warn({ err, raw }, "Failed to parse tool arguments");
     return {};
   }
 }
 
-function normaliseToolCall(call) {
+function normaliseToolCall(call, providerType = null) {
   const name = call?.function?.name ?? call?.name;
   const id = call?.id ?? `${name ?? "tool"}_${Date.now()}`;
   return {
     id,
     name,
-    arguments: parseArguments(call),
+    arguments: parseArguments(call, providerType),
     raw: call,
   };
 }
@@ -182,7 +258,8 @@ function listTools() {
 }
 
 async function executeToolCall(call, context = {}) {
-  const normalisedCall = normaliseToolCall(call);
+  const providerType = context?.providerType || context?.provider || null;  
+  const normalisedCall = normaliseToolCall(call, providerType);
   let registered = registry.get(normalisedCall.name);
   if (!registered) {
     const aliasTarget = TOOL_ALIASES[normalisedCall.name.toLowerCase()];
@@ -225,6 +302,10 @@ async function executeToolCall(call, context = {}) {
   }
 
   if (!registered) {
+    logger.warn({
+      tool: normalisedCall.name,
+      id: normalisedCall.id
+    }, "Tool not registered");
     const content = coerceString({
       error: "tool_not_registered",
       tool: normalisedCall.name,
@@ -241,6 +322,17 @@ async function executeToolCall(call, context = {}) {
     };
   }
 
+  // Log tool invocation with full details for debugging
+  logger.info({
+    tool: normalisedCall.name,
+    id: normalisedCall.id,
+    args: normalisedCall.arguments,
+    argsKeys: Object.keys(normalisedCall.arguments || {}),
+    rawCall: JSON.stringify(normalisedCall.raw)
+  }, "=== EXECUTING TOOL ===");
+
+  startTime = Date.now()
+
   try {
     const result = await registered.handler(
       {
@@ -251,11 +343,47 @@ async function executeToolCall(call, context = {}) {
       },
       context,
     );
-    const formatted = normalizeHandlerResult(result);
+    let formatted = normalizeHandlerResult(result);
+
+    // Auto-approve external file reads: the user already asked to read the file,
+    // so re-execute transparently with user_approved=true instead of relying
+    // on the LLM to manage a multi-step approval conversation.
+    if (
+      formatted.content &&
+      typeof formatted.content === "string" &&
+      formatted.content.startsWith("[APPROVAL REQUIRED]")
+    ) {
+      logger.info(
+        { tool: normalisedCall.name, id: normalisedCall.id },
+        "Auto-approving external file read (user initiated the request)",
+      );
+      const approvedResult = await registered.handler(
+        {
+          id: normalisedCall.id,
+          name: normalisedCall.name,
+          args: { ...normalisedCall.arguments, user_approved: true },
+          raw: normalisedCall.raw,
+        },
+        context,
+      );
+      formatted = normalizeHandlerResult(approvedResult);
+    }
 
     // Apply tool output truncation for token efficiency
     const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content);
 
+    const durationMs = Date.now() - startTime;
+
+    // Log successful execution
+    logger.info({
+      tool: normalisedCall.name,
+      id: normalisedCall.id,
+      status: formatted.status,
+      durationMs,
+      outputLength: truncatedContent?.length || 0,
+      truncated: truncatedContent !== formatted.content
+    }, "Tool execution completed");
+
     return {
       id: normalisedCall.id,
       name: normalisedCall.name,
@@ -267,11 +395,20 @@ async function executeToolCall(call, context = {}) {
         registered: true,
         truncated: truncatedContent !== formatted.content,
         originalLength: formatted.content?.length,
-        truncatedLength: truncatedContent?.length
+        truncatedLength: truncatedContent?.length,
+        durationMs
       },
     };
   } catch (err) {
-    logger.error({ err, tool: normalisedCall.name }, "Tool execution failed");
+    const durationMs = Date.now() - startTime;
+
+    logger.error({
+      err,
+      tool: normalisedCall.name,
+      id: normalisedCall.id,
+      durationMs
+    }, "Tool execution failed");
+
     return {
       id: normalisedCall.id,
       name: normalisedCall.name,
@@ -286,6 +423,7 @@ async function executeToolCall(call, context = {}) {
       metadata: {
         registered: true,
         error: true,
+        durationMs
       },
       error: err,
     };
diff --git a/src/tools/indexer.js b/src/tools/indexer.js
index eb0a981..bf13ca8 100644
--- a/src/tools/indexer.js
+++ b/src/tools/indexer.js
@@ -16,11 +16,13 @@ function registerWorkspaceListTool() {
   registerTool(
     "workspace_list",
     async ({ args = {} }) => {
+      // Support both 'pattern' (Glob tool) and 'patterns' (workspace_list)
+      const rawPatterns = args.pattern ?? args.patterns;
       const patterns =
-        typeof args.patterns === "string"
-          ? [args.patterns]
-          : Array.isArray(args.patterns)
-          ? args.patterns
+        typeof rawPatterns === "string"
+          ? [rawPatterns]
+          : Array.isArray(rawPatterns)
+          ? rawPatterns
           : undefined;
       const ignore =
         typeof args.ignore === "string"
@@ -53,10 +55,62 @@ function registerWorkspaceListTool() {
   );
 }
 
+/**
+ * Search recent conversation context for content matching a query.
+ *
+ * Scans the last 10 messages for tool_result content that matches
+ * the query words. Returns matches sorted by relevance.
+ *
+ * @param {string} query - Search query
+ * @param {Array} messages - Recent conversation messages
+ * @returns {Array} Matching context snippets
+ */
+function searchRecentContext(query, messages) {
+  if (!query || !messages || !Array.isArray(messages)) return [];
+
+  const queryLower = query.toLowerCase();
+  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 2);
+  if (queryWords.length === 0) return [];
+
+  const matches = [];
+
+  // Scan last 10 messages for tool_result content
+  const recent = messages.slice(-10);
+  for (const msg of recent) {
+    if (msg.role !== "tool" && msg.role !== "user") continue;
+
+    const content =
+      typeof msg.content === "string"
+        ? msg.content
+        : Array.isArray(msg.content)
+          ? msg.content
+              .filter((b) => b.type === "tool_result" || b.type === "text")
+              .map((b) => b.content ?? b.text ?? "")
+              .join("\n")
+          : "";
+
+    if (!content || content.length < 20) continue;
+
+    // Check if any query words appear in the content
+    const contentLower = content.toLowerCase();
+    const matchCount = queryWords.filter((w) => contentLower.includes(w)).length;
+
+    if (matchCount > 0 && matchCount / queryWords.length >= 0.3) {
+      matches.push({
+        source: "conversation_context",
+        relevance: matchCount / queryWords.length,
+        preview: content.substring(0, 500),
+      });
+    }
+  }
+
+  return matches.sort((a, b) => b.relevance - a.relevance).slice(0, 3);
+}
+
 function registerWorkspaceSearchTool() {
   registerTool(
     "workspace_search",
-    async ({ args = {} }) => {
+    async ({ args = {} }, context = {}) => {
       const query = args.query ?? args.term ?? args.pattern;
       const regex = args.regex === true || args.is_regex === true;
       const limit = Number.isInteger(args.limit) ? args.limit : undefined;
@@ -67,6 +121,9 @@ function registerWorkspaceSearchTool() {
           ? args.ignore
           : undefined;
 
+      // Check recent conversation context for matching content
+      const contextMatches = searchRecentContext(query, context.requestMessages);
+
       const result = await searchWorkspace({
         query,
         regex,
@@ -74,12 +131,21 @@ function registerWorkspaceSearchTool() {
         ignore,
       });
 
+      // Prepend context matches if found
+      if (contextMatches.length > 0) {
+        result.context_matches = contextMatches;
+        result.note =
+          "Results from recently read files are listed in context_matches. " +
+          "Prefer these over workspace matches when answering about previously read content.";
+      }
+
       return {
         ok: true,
         status: 200,
         content: JSON.stringify(result, null, 2),
         metadata: {
           total: result.matches.length,
+          contextTotal: contextMatches.length,
           engine: result.engine,
         },
       };
@@ -260,6 +326,45 @@ function registerSymbolReferencesTool() {
   );
 }
 
+
+/**
+ * Dedicated Glob tool for Claude Code compatibility (maybe others?).
+ *
+ * Why this exists (instead of using workspace_list alias):
+ * - Claude Code's Glob tool returns plain text (one path per line)
+ * - workspace_list returns JSON with entries array
+ * - Models expect plain text format from Glob tool
+ *
+ * See also: TOOL_ALIASES in src/tools/index.js (commented glob entries)
+ */
+function registerGlobTool() {
+  registerTool(
+    "Glob",
+    async ({ args = {} }) => {
+      const pattern = args.pattern;
+      const basePath = args.path;
+
+      let patterns;
+      if (basePath) {
+        const cleanPath = basePath.replace(/\/+$/, "");
+        patterns = pattern ? [`${cleanPath}/${pattern}`] : [`${cleanPath}/**/*`];
+      } else {
+        patterns = pattern ? [pattern] : undefined;
+      }
+
+      const entries = await listWorkspaceFiles({ patterns, limit: 1000 });
+
+      // Plain text output: one path per line (Claude Code format)
+      return {
+        ok: true,
+        status: 200,
+        content: entries.map((e) => e.path).join("\n"),
+      };
+    },
+    { category: "indexing" },
+  );
+}
+
 function registerGotoDefinitionTool() {
   registerTool(
     "workspace_goto_definition",
@@ -353,6 +458,7 @@ function registerIndexerTools() {
   registerSymbolSearchTool();
   registerSymbolReferencesTool();
   registerGotoDefinitionTool();
+  registerGlobTool();
 }
 
 module.exports = {
diff --git a/src/tools/stubs.js b/src/tools/stubs.js
index c026e8e..d2f1bd3 100644
--- a/src/tools/stubs.js
+++ b/src/tools/stubs.js
@@ -41,12 +41,41 @@ function createStubHandler(name, description) {
   });
 }
 
+function askUserQuestionHandler({ args }) {
+  let questions = args?.questions ?? [];
+
+  if (typeof questions === "string") {
+    try { questions = JSON.parse(questions); } catch { questions = []; }
+  }
+
+  if (!Array.isArray(questions)) questions = [questions];
+  const lines = questions.map((q, i) => {
+    const header = q.header ? `[${q.header}] ` : "";
+    const opts = (q.options ?? [])
+      .map((o, j) => `  ${j + 1}. ${o.label} — ${o.description}`)
+      .join("\n");
+    return `${header}${q.question}\n${opts}`;
+  });
+
+  return {
+    ok: true,
+    status: 200,
+    content: lines.join("\n\n"),
+  };
+}
+
 function registerStubTools() {
   STUB_TOOLS.forEach((tool) => {
     if (!hasTool(tool.name)) {
       registerTool(tool.name, createStubHandler(tool.name, tool.description), tool);
     }
   });
+
+  if (!hasTool("AskUserQuestion")) {
+    registerTool("AskUserQuestion", askUserQuestionHandler, {
+      description: "Returns the model's question to the user as assistant output.",
+    });
+  }
 }
 
 module.exports = {
diff --git a/src/tools/workspace.js b/src/tools/workspace.js
index 144c6c1..37933ae 100644
--- a/src/tools/workspace.js
+++ b/src/tools/workspace.js
@@ -1,8 +1,12 @@
+const path = require("path");
 const {
   readFile,
   writeFile,
   applyFilePatch,
   resolveWorkspacePath,
+  expandTilde,
+  isExternalPath,
+  readExternalFile,
   fileExists,
   workspaceRoot,
 } = require("../workspace");
@@ -30,17 +34,40 @@ function registerWorkspaceTools() {
   registerTool(
     "fs_read",
     async ({ args = {} }) => {
-      const relativePath = validateString(args.path ?? args.file, "path");
+      const targetPath = validateString(args.path ?? args.file ?? args.file_path, "path");
       const encoding = normalizeEncoding(args.encoding);
-      const content = await readFile(relativePath, encoding);
+
+      // Check if path is outside workspace
+      if (isExternalPath(targetPath)) {
+        if (args.user_approved !== true) {
+          const expanded = expandTilde(targetPath);
+          const resolved = path.resolve(expanded);
+          return {
+            ok: true,
+            status: 200,
+            content: `[APPROVAL REQUIRED] The file "${resolved}" is outside the workspace and cannot be read without user permission.\n\nYou must now ask the user: "The file ${resolved} is outside the workspace. May I read it?"\n\nIf the user says yes, call the Read tool again with file_path="${targetPath}" and user_approved=true.`,
+          };
+        }
+        // User approved — read external file
+        const { content, resolvedPath } = await readExternalFile(targetPath, encoding);
+        return {
+          ok: true,
+          status: 200,
+          content,
+          metadata: { path: targetPath, encoding, resolved_path: resolvedPath },
+        };
+      }
+
+      // Normal workspace read (unchanged)
+      const content = await readFile(targetPath, encoding);
       return {
         ok: true,
         status: 200,
         content,
         metadata: {
-          path: relativePath,
+          path: targetPath,
           encoding,
-          resolved_path: resolveWorkspacePath(relativePath),
+          resolved_path: resolveWorkspacePath(targetPath),
         },
       };
     },
@@ -114,7 +141,7 @@ function registerWorkspaceTools() {
   registerTool(
     "edit_patch",
     async ({ args = {} }, context = {}) => {
-      const relativePath = validateString(args.path ?? args.file, "path");
+      const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path");
       const patch = validateString(args.patch, "patch");
       const encoding = normalizeEncoding(args.encoding);
 
diff --git a/src/workspace/index.js b/src/workspace/index.js
index da1a7e0..6cc058a 100644
--- a/src/workspace/index.js
+++ b/src/workspace/index.js
@@ -10,6 +10,33 @@ if (!fs.existsSync(workspaceRoot)) {
   fs.mkdirSync(workspaceRoot, { recursive: true });
 }
 
+function expandTilde(targetPath) {
+  if (typeof targetPath !== "string") return targetPath;
+  if (targetPath.startsWith("~")) {
+    const home = process.env.HOME || process.env.USERPROFILE;
+    if (home) {
+      return path.join(home, targetPath.slice(1));
+    }
+  }
+  return targetPath;
+}
+
+function isExternalPath(targetPath) {
+  const expanded = expandTilde(targetPath);
+  const resolved = path.resolve(workspaceRoot, expanded);
+  return !resolved.startsWith(workspaceRoot);
+}
+
+async function readExternalFile(targetPath, encoding = "utf8") {
+  const expanded = expandTilde(targetPath);
+  const resolved = path.resolve(expanded);
+  const stats = await fsp.stat(resolved);
+  if (!stats.isFile()) {
+    throw new Error("Requested path is not a file.");
+  }
+  return { content: await fsp.readFile(resolved, { encoding }), resolvedPath: resolved };
+}
+
 function resolveWorkspacePath(targetPath) {
   if (!targetPath || typeof targetPath !== "string") {
     throw new Error("Path must be a non-empty string.");
@@ -110,6 +137,9 @@ function validateCwd(cwd) {
 module.exports = {
   workspaceRoot,
   resolveWorkspacePath,
+  expandTilde,
+  isExternalPath,
+  readExternalFile,
   readFile,
   writeFile,
   fileExists,