CambrianTech · joelteply · May 25, 2026 · May 25, 2026
diff --git a/src/commands/ai/generate/server/AIGenerateServerCommand.ts b/src/commands/ai/generate/server/AIGenerateServerCommand.ts
@@ -1,11 +1,25 @@
 /**
- * AI Generate Command - Server Implementation
- * ============================================
+ * AI Generate Command - Server Implementation (thin shim)
+ * =======================================================
  *
- * Server-side AI generation with RAG context building
- * All database access and LLM calls happen here
+ * Rust owns response generation: prompt assembly (system prompt +
+ * history + time prefixes + hour-gap markers + identity reminder),
+ * provider selection, admission gating, timeout, and token-usage
+ * stamping all live in `cognition/generate_response.rs`. This shim:
+ *
+ *   1. Builds the RAG context server-side (still TS — the
+ *      `ChatRAGBuilder` factory + entity reads have not been ported
+ *      to Rust yet; tracked separately).
+ *   2. Adapts the RAG context onto `AIDecisionContext` and hands off
+ *      to `AIDecisionService.generateResponse`, which is the proven
+ *      IPC seam already used by PersonaUser's response path.
+ *   3. Translates the Rust result back to `AIGenerateResult`.
+ *
+ * Direct-message and preview modes remain TS-side because they are
+ * introspection/test paths that bypass admission and provider
+ * selection — Rust intentionally does not expose a "skip the gate"
+ * code path.
  */
-
 import { AIGenerateCommand } from '../shared/AIGenerateCommand';
 import type { JTAGContext } from '../../../../system/core/types/JTAGTypes';
 import type { ICommandDaemon } from '../../../../daemons/command-daemon/shared/CommandBase';
@@ -14,13 +28,12 @@ import { paramsToRequest, responseToResult, createErrorResult, createAIGenerateR
 import { AIProviderDaemon } from '../../../../daemons/ai-provider-daemon/shared/AIProviderDaemon';
 import { RAGBuilderFactory } from '../../../../system/rag/shared/RAGBuilder';
 import { getContextWindow, getInferenceSpeed } from '../../../../system/shared/ModelContextWindows';
-import type { RAGContext } from '../../../../system/rag/shared/RAGTypes';
 import { ChatRAGBuilder } from '../../../../system/rag/builders/ChatRAGBuilder';
 import { ORM } from '../../../../daemons/data-daemon/server/ORM';
 import { UserEntity } from '../../../../system/data/entities/UserEntity';
+import { ChatMessageEntity } from '../../../../system/data/entities/ChatMessageEntity';
 import type { TextGenerationRequest } from '../../../../daemons/ai-provider-daemon/shared/AIProviderTypesV2';
-import { SystemPaths } from '../../../../system/core/config/SystemPaths';
-import { LOCAL_MODELS } from '../../../../system/shared/Constants';
+import { AIDecisionService, type AIDecisionContext } from '../../../../system/ai/server/AIDecisionService';
 
 export class AIGenerateServerCommand extends AIGenerateCommand {
   constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) {
@@ -34,16 +47,11 @@ export class AIGenerateServerCommand extends AIGenerateCommand {
 
   async execute(params: AIGenerateParams): Promise<AIGenerateResult> {
     try {
-      let request: TextGenerationRequest;
-      let ragContext: RAGContext | undefined = undefined;
-
-      // Mode selection: RAG context building OR direct messages
+      // RAG MODE: build context, delegate to Rust generate-response
       if (params.roomId) {
-        // RAG MODE: Build context from chat room (SAME code path as PersonaUser)
-
         // Find persona if not specified
         let targetPersonaId = params.personaId;
-        let personaDisplayName = 'ai-generate-command'; // Fallback name for tracking
+        let personaDisplayName = 'ai-generate-command';
         if (!targetPersonaId) {
           const usersResult = await ORM.query<UserEntity>({
             collection: UserEntity.collection,
@@ -60,9 +68,8 @@ export class AIGenerateServerCommand extends AIGenerateCommand {
           personaDisplayName = personaRecord.data.displayName;
         }
 
-        // Build RAG context (SAME code as PersonaUser.respondToMessage line 207-215)
         const ragBuilder = RAGBuilderFactory.getBuilder('chat');
-        ragContext = await ragBuilder.buildContext(
+        const ragContext = await ragBuilder.buildContext(
           params.roomId,
           targetPersonaId,
           {
@@ -78,100 +85,152 @@ export class AIGenerateServerCommand extends AIGenerateCommand {
           }
         );
 
-        // Convert to messages array with timestamps + gaps (SAME as PersonaUser.ts:376-415)
-        const messages: TextGenerationRequest['messages'] = [];
-        messages.push({
-          role: 'system',
-          content: ragContext.identity.systemPrompt
-        });
-
-        // Add conversation history with timestamp formatting + gap detection
-        let lastTimestamp: number | undefined;
-        for (const msg of ragContext.conversationHistory) {
-          let timePrefix = '';
-          if (msg.timestamp) {
-            const date = new Date(msg.timestamp);
-            const hours = date.getHours().toString().padStart(2, '0');
-            const minutes = date.getMinutes().toString().padStart(2, '0');
-            timePrefix = `[${hours}:${minutes}] `;
-
-            // Detect significant time gaps (> 1 hour)
-            if (lastTimestamp && (msg.timestamp - lastTimestamp > 3600000)) {
-              const gapHours = Math.floor((msg.timestamp - lastTimestamp) / 3600000);
-              messages.push({
-                role: 'system',
-                content: `⏱️ ${gapHours} hour${gapHours > 1 ? 's' : ''} passed - conversation resumed`
-              });
-            }
-            lastTimestamp = msg.timestamp;
-          }
-
-          messages.push({
-            role: msg.role,
-            content: msg.name ? `${timePrefix}${msg.name}: ${msg.content}` : `${timePrefix}${msg.content}`
+        // PREVIEW MODE: reconstruct the request Rust would build (best-effort
+        // mirror; the source of truth is `build_response_generation_request`
+        // in cognition/generate_response.rs). Returns without inference.
+        if (params.preview) {
+          const previewRequest = this.previewRequestFromRag(params, ragContext, targetPersonaId, personaDisplayName);
+          const formatted = this.formatRequestPreview(previewRequest, ragContext);
+          return createAIGenerateResultFromParams(params, {
+            success: true,
+            preview: true,
+            request: previewRequest,
+            formatted,
+            ragContext: ragContext as unknown as Record<string, unknown>
           });
         }
 
-        // Identity reminder with current time
-        const now = new Date();
-        const currentTime = `${now.toLocaleDateString('en-US', { month: '2-digit', day: '2-digit', year: 'numeric' })} ${now.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false })}`;
-        messages.push({
-          role: 'system',
-          content: `IDENTITY REMINDER: You are ${ragContext.identity.name}. Respond naturally with JUST your message - NO name prefix.\n\nCURRENT TIME: ${currentTime}\n\nIMPORTANT: Pay attention to timestamps [HH:MM]. If messages are from hours ago but current question is recent, topic likely changed. Focus on MOST RECENT message.`
-        });
-
-        // Build request with personaContext for proper logging and routing
-        request = {
-          messages,
-          model: params.model || LOCAL_MODELS.DEFAULT,
-          temperature: params.temperature ?? 0.7,
-          maxTokens: params.maxTokens ?? 150,
-          // Default to 'local' (DMR via Rust IPC), NEVER a cloud provider.
-          // Continuum's architectural point is local models; cloud providers
-          // are opt-in via explicit --provider, not silent fallback. Pre-fix
-          // the default was 'candle' which is misleading (Candle is a
-          // training framework, not inference) and Rust's routing for an
-          // unknown provider could pick a registered cloud adapter (Carl's
-          // #980 Bug 7: silent DeepSeek 401 with no key configured). 'local'
-          // explicitly routes to Rust→DMR; if DMR isn't running, Rust
-          // hard-fails with an actionable error instead of silently falling
-          // through to a cloud provider that requires a key the user never
-          // set. Joel: "deepseek can't be a fallback" / "whole point is
-          // local models, make them work."
-          provider: params.provider || 'local',
-          personaContext: {
-            uniqueId: targetPersonaId,
-            displayName: ragContext.identity?.name || personaDisplayName,
-            logDir: SystemPaths.personas.dir(targetPersonaId)
-          }
+        // Adapt onto AIDecisionContext for the Rust shim.
+        // triggerMessage is the latest history entry — Rust uses it for
+        // the admission lease/artifact key, not for prompt content.
+        const history = ragContext.conversationHistory;
+        const triggerMessage = this.synthesizeTriggerMessage(history, params.roomId);
+        const decisionContext: AIDecisionContext = {
+          personaId: targetPersonaId,
+          personaName: ragContext.identity?.name || personaDisplayName,
+          roomId: params.roomId,
+          triggerMessage,
+          ragContext,
+          systemPrompt: ragContext.identity.systemPrompt,
         };
 
-      } else if (params.messages) {
-        // DIRECT MODE: Use provided messages
-        request = paramsToRequest(params);
-
-      } else {
-        return createErrorResult(params, 'Either roomId or messages must be provided');
-      }
-
-      // PREVIEW MODE: Return request without calling LLM
-      if (params.preview) {
-        const formatted = this.formatRequestPreview(request, ragContext);
+        const generation = await AIDecisionService.generateResponse(decisionContext, {
+          model: params.model,
+          temperature: params.temperature,
+          maxTokens: params.maxTokens,
+        });
 
         return createAIGenerateResultFromParams(params, {
           success: true,
-          preview: true,
-          request,
-          formatted,
-          ragContext: ragContext as unknown as Record<string, unknown>
+          text: generation.text,
+          model: generation.model,
+          provider: params.provider || 'local',
+          responseTimeMs: generation.responseTime,
+          requestId: undefined,
+          usage: generation.tokensUsed
+            ? {
+                inputTokens: generation.tokensUsed.input,
+                outputTokens: generation.tokensUsed.output,
+                totalTokens: generation.tokensUsed.total,
+              }
+            : undefined,
         });
       }
 
-      // GENERATION MODE: Call AIProviderDaemon
-      const response = await AIProviderDaemon.generateText(request);
-      return responseToResult(response, params);
+      // DIRECT MODE: pass-through to AIProviderDaemon. No admission gate
+      // here — direct mode is a test/introspection path; production
+      // traffic comes through RAG mode above.
+      if (params.messages) {
+        const request: TextGenerationRequest = paramsToRequest(params);
+
+        if (params.preview) {
+          const formatted = this.formatRequestPreview(request, undefined);
+          return createAIGenerateResultFromParams(params, {
+            success: true,
+            preview: true,
+            request,
+            formatted,
+            ragContext: undefined
+          });
+        }
+
+        const response = await AIProviderDaemon.generateText(request);
+        return responseToResult(response, params);
+      }
+
+      return createErrorResult(params, 'Either roomId or messages must be provided');
     } catch (error) {
       return createErrorResult(params, error instanceof Error ? error.message : String(error));
     }
   }
+
+  private previewRequestFromRag(
+    params: AIGenerateParams,
+    ragContext: import('../../../../system/rag/shared/RAGTypes').RAGContext,
+    targetPersonaId: string,
+    personaDisplayName: string
+  ): TextGenerationRequest {
+    // Mirror of what cognition/generate_response.rs assembles. Kept
+    // local so --preview stays useful without IPC. If the Rust prompt
+    // assembly changes, this drifts — wire a `cognition/preview-request`
+    // IPC if drift becomes a problem.
+    const messages: TextGenerationRequest['messages'] = [
+      { role: 'system', content: ragContext.identity.systemPrompt }
+    ];
+    let lastTimestamp: number | undefined;
+    for (const msg of ragContext.conversationHistory) {
+      let timePrefix = '';
+      if (msg.timestamp) {
+        const date = new Date(msg.timestamp);
+        const hours = date.getHours().toString().padStart(2, '0');
+        const minutes = date.getMinutes().toString().padStart(2, '0');
+        timePrefix = `[${hours}:${minutes}] `;
+        if (lastTimestamp && (msg.timestamp - lastTimestamp > 3600000)) {
+          const gapHours = Math.floor((msg.timestamp - lastTimestamp) / 3600000);
+          messages.push({
+            role: 'system',
+            content: `⏱️ ${gapHours} hour${gapHours > 1 ? 's' : ''} passed - conversation resumed`
+          });
+        }
+        lastTimestamp = msg.timestamp;
+      }
+      messages.push({
+        role: msg.role,
+        content: msg.name ? `${timePrefix}${msg.name}: ${msg.content}` : `${timePrefix}${msg.content}`
+      });
+    }
+    const now = new Date();
+    const currentTime = `${now.toLocaleDateString('en-US', { month: '2-digit', day: '2-digit', year: 'numeric' })} ${now.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false })}`;
+    messages.push({
+      role: 'system',
+      content: `IDENTITY REMINDER: You are ${ragContext.identity?.name || personaDisplayName}. Respond naturally with JUST your message - NO name prefix.\n\nCURRENT TIME: ${currentTime}\n\nIMPORTANT: Pay attention to timestamps [HH:MM]. If messages are from hours ago but current question is recent, topic likely changed. Focus on MOST RECENT message.`
+    });
+    return {
+      messages,
+      model: params.model,
+      temperature: params.temperature ?? 0.7,
+      maxTokens: params.maxTokens ?? 150,
+      provider: params.provider || 'local',
+      personaContext: {
+        uniqueId: targetPersonaId,
+        displayName: ragContext.identity?.name || personaDisplayName,
+        logDir: ''
+      }
+    };
+  }
+
+  private synthesizeTriggerMessage(
+    history: import('../../../../system/rag/shared/RAGTypes').RAGContext['conversationHistory'],
+    roomId: string
+  ): ChatMessageEntity {
+    // Latest message is the trigger. Rust uses this for the admission
+    // lease key (room+persona+messageId) — the prompt content comes
+    // from ragContext.conversationHistory regardless.
+    const last = history[history.length - 1];
+    const msg = new ChatMessageEntity();
+    msg.roomId = roomId as ChatMessageEntity['roomId'];
+    msg.content = { text: last?.content ?? '', media: [] };
+    msg.timestamp = new Date(last?.timestamp ?? Date.now());
+    return msg;
+  }
 }
diff --git a/src/eslint-baseline.txt b/src/eslint-baseline.txt
@@ -1 +1 @@
-5432
+5431