Fast-Editor · MichaelAnders · Feb 5, 2026 · Feb 6, 2026 · Feb 8, 2026 · Feb 9, 2026
diff --git a/.env.example b/.env.example
@@ -212,6 +212,14 @@ WORKSPACE_INDEX_ENABLED=true
 # - client/passthrough: Return tool calls to CLI for local execution
 TOOL_EXECUTION_MODE=server
 
+# Suggestion mode model override
+# Controls which model handles suggestion mode (predicting next user input).
+# Values:
+#   default - Use the same model as MODEL_PROVIDER (no change)
+#   none    - Skip suggestion mode LLM calls entirely (saves GPU time)
+#   <model> - Use a specific model (e.g. "llama3.1" for a lighter model)
+SUGGESTION_MODE_MODEL=default
+
 # Enable/disable automatic tool injection for local models
 INJECT_TOOLS_LLAMACPP=true
 INJECT_TOOLS_OLLAMA=true

diff --git a/src/api/middleware/logging.js b/src/api/middleware/logging.js
@@ -12,26 +12,92 @@
   return clone;
 }
 
-const loggingMiddleware = pinoHttp({
+const baseLoggingMiddleware = pinoHttp({
   logger,
-  customProps: (req) => ({
+  autoLogging: false, // Disable automatic logging so we can log manually with bodies
+  customProps: (req, res) => ({
     sessionId: req.sessionId ?? null,
   }),
-  customLogLevel: (req, res, err) => {
-    if (err || res.statusCode >= 500) return "error";
-    if (res.statusCode >= 400) return "warn";
-    return "info";
-  },
-  wrapSerializers: true,
-  serializers: {
-    req(req) {
-      return {
+});
+
+// Wrapper middleware to capture and log full request/response bodies
+function loggingMiddleware(req, res, next) {
+  const startTime = Date.now();
+
+  // Log request with full body immediately
+  logger.info({
+    sessionId: req.sessionId ?? null,
+    req: {
+      method: req.method,
+      url: req.url,
+      headers: maskHeaders(req.headers),
+    },
+    requestBody: req.body, // Full request body without truncation
+  }, 'request started');
+
+  // Intercept res.write for streaming responses
+  const originalWrite = res.write;
+  const chunks = [];
+  res.write = function (chunk) {
+    if (chunk) {
+      chunks.push(Buffer.from(chunk));
+    }
+    return originalWrite.apply(this, arguments);
+  };
+
+  // Intercept res.send to capture the body
+  const originalSend = res.send;
+  res.send = function (body) {
+    res._capturedBody = body;
+
+    // Parse if it's a JSON string for better logging
+    if (typeof body === 'string') {
+      try {
+        res._capturedBody = JSON.parse(body);
+      } catch (e) {
+        res._capturedBody = body;
+      }
+    }
+
+    return originalSend.call(this, body);
+  };
+
+  // Log response when finished
+  res.on('finish', () => {
+    const responseTime = Date.now() - startTime;
+
+    // Capture streaming body if not already captured via send()
+    if (chunks.length > 0 && !res._capturedBody) {
+      const fullBody = Buffer.concat(chunks).toString('utf8');
+      res._capturedBody = {
+        type: 'stream',
+        contentType: res.getHeader('content-type'),
+        size: fullBody.length,
+        preview: fullBody.substring(0, 1000)
+      };
+    }
+
+    const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info';
+
+    logger[logLevel]({
+      sessionId: req.sessionId ?? null,
+      req: {
         method: req.method,
         url: req.url,
         headers: maskHeaders(req.headers),
-      };
-    },
-  },
-});
+      },
+      res: {
+        statusCode: res.statusCode,
+        headers: res.getHeaders ? res.getHeaders() : res.headers,
+      },
+      requestBody: req.body, // Full request body without truncation
+      responseBody: res._capturedBody, // Full response body without truncation
+      responseTime,
+    }, 'request completed');
+  });
+
+  // Still call base middleware to set up req.log
+  baseLoggingMiddleware(req, res, next);
+}
 
 module.exports = loggingMiddleware;
diff --git a/src/api/middleware/request-logging.js b/src/api/middleware/request-logging.js
@@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) {
   // Add to response headers
   res.setHeader("X-Request-ID", requestId);
 
-  // Log request start
+// Log request start with full body
   logger.info(
     {
       requestId,
       method: req.method,
       path: req.path || req.url,
       query: req.query,
+      body: req.body, // Full request body without truncation
       ip: req.ip || req.socket.remoteAddress,
       userAgent: req.headers["user-agent"],
     },
@@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) {
   res.send = function (body) {
     const duration = Date.now() - startTime;
 
-    // Log request completion
+    // Parse body if it's a string
+    let responseBody = body;
+    if (typeof body === 'string') {
+      try {
+        responseBody = JSON.parse(body);
+      } catch (e) {
+        // Keep as string if not JSON
+        responseBody = body;
+      }
+    }
+
+    // Log request completion with full request and response bodies
     logger.info(
       {
         requestId,
@@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) {
         status: res.statusCode,
         duration,
         contentLength: res.getHeader("content-length"),
+        requestBody: req.body, // Full request body for reference
+        responseBody, // Full response body without truncation
       },
       "Request completed"
     );

diff --git a/src/api/router.js b/src/api/router.js
@@ -7,6 +7,7 @@ const openaiRouter = require("./openai-router");
 const providersRouter = require("./providers-handler");
 const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
 const { validateCwd } = require("../workspace");
+const logger = require("../logger");
 
 const router = express.Router();
 
@@ -121,6 +122,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
     const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream);
     const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0;
 
+    logger.info({
+      sessionId: req.headers['x-claude-session-id'],
+      wantsStream,
+      hasTools,
+      willUseStreamingPath: wantsStream || hasTools
+    }, "=== REQUEST ROUTING DECISION ===");
+
     // Analyze complexity for routing headers (Phase 3)
     const complexity = analyzeComplexity(req.body);
     const routingHeaders = getRoutingHeaders({
@@ -338,6 +346,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
     // Legacy streaming wrapper (for tool-based requests that requested streaming)
     if (wantsStream && hasTools) {
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        pathType: 'legacy_streaming_wrapper',
+        wantsStream,
+        hasTools
+      }, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ===");
+
       metrics.recordStreamingStart();
       res.set({
         "Content-Type": "text/event-stream",
@@ -359,6 +374,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       // Use proper Anthropic SSE format
       const msg = result.body;
 
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        eventType: 'message_start',
+        streamingWithTools: true,
+        hasContent: !!(msg.content && msg.content.length > 0)
+      }, "=== SENDING SSE MESSAGE_START ===");
+
       // 1. message_start
       res.write(`event: message_start\n`);
       res.write(`data: ${JSON.stringify({
@@ -419,9 +441,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
           res.write(`event: content_block_stop\n`);
           res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+        } else if (block.type === "tool_result") {
+          // === TOOL_RESULT SSE STREAMING - ENTERED ===
+          logger.info({
+            blockIndex: i,
+            blockType: block.type,
+            toolUseId: block.tool_use_id,
+            contentType: typeof block.content,
+            contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ===");
+
+          // Stream tool_result blocks so CLI can display actual tool output
+          res.write(`event: content_block_start\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_start",
+            index: i,
+            content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" }
+          })}\n\n`);
+
+          // Stream the actual content
+          const content = typeof block.content === 'string'
+            ? block.content
+            : JSON.stringify(block.content);
+
+          logger.info({
+            blockIndex: i,
+            contentLength: content.length,
+            contentPreview: content.substring(0, 200)
+          }, "=== SSE: STREAMING TOOL_RESULT CONTENT ===");
+
+          res.write(`event: content_block_delta\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_delta",
+            index: i,
+            delta: { type: "tool_result_delta", content: content }
+          })}\n\n`);
+
+          res.write(`event: content_block_stop\n`);
+          res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+
+          // === TOOL_RESULT SSE STREAMING - COMPLETED ===
+          logger.info({
+            blockIndex: i,
+            toolUseId: block.tool_use_id
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ===");
         }
       }
-
       // 3. message_delta with stop_reason
       res.write(`event: message_delta\n`);
       res.write(`data: ${JSON.stringify({
@@ -454,6 +519,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       });
     }
 
+
+    // DIAGNOSTIC: Log response being sent to client
+    logger.info({
+      status: result.status,
+      hasBody: !!result.body,
+      bodyKeys: result.body ? Object.keys(result.body) : [],
+      bodyType: typeof result.body,
+      contentLength: result.body ? JSON.stringify(result.body).length : 0
+    }, "=== SENDING RESPONSE TO CLIENT ===");
+
     metrics.recordResponse(result.status);
     res.status(result.status).send(result.body);
   } catch (error) {