From a861fcc8939977ff32aec9a937b9282a12056a0a Mon Sep 17 00:00:00 2001 From: Test Date: Mon, 25 May 2026 14:38:19 -0500 Subject: [PATCH] =?UTF-8?q?oxidizer:=20AIGenerateServerCommand=20=E2=86=92?= =?UTF-8?q?=20cognition/generate-response=20shim?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RAG-mode generation now delegates to Rust via AIDecisionService — the same IPC seam PersonaUser's response path already uses. Rust owns prompt assembly (system prompt + history + time prefixes + hour-gap markers + identity reminder), provider routing, admission gating, timeout, and token-usage stamping (build_response_messages + build_response_generation_request in cognition/generate_response.rs). Direct-message + preview modes stay TS-side: - Direct mode is an introspection/test path that bypasses admission; Rust intentionally does not expose a "skip the gate" code path. - Preview mode reconstructs the request Rust would build as a local mirror. Source of truth is the Rust path; if assembly drifts a `cognition/preview-request` IPC is the fix. Mirrors the pattern from #1421 (should-respond) and #1426 (validate-response-decision). The 100-line of TS message-building that duplicated build_response_messages now lives only in Rust. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../server/AIGenerateServerCommand.ts | 255 +++++++++++------- src/eslint-baseline.txt | 2 +- 2 files changed, 158 insertions(+), 99 deletions(-) diff --git a/src/commands/ai/generate/server/AIGenerateServerCommand.ts b/src/commands/ai/generate/server/AIGenerateServerCommand.ts index 39946c20c..13a2e4805 100644 --- a/src/commands/ai/generate/server/AIGenerateServerCommand.ts +++ b/src/commands/ai/generate/server/AIGenerateServerCommand.ts @@ -1,11 +1,25 @@ /** - * AI Generate Command - Server Implementation - * ============================================ + * AI Generate Command - Server Implementation (thin shim) + * ======================================================= * - * Server-side AI generation with RAG context building - * All database access and LLM calls happen here + * Rust owns response generation: prompt assembly (system prompt + + * history + time prefixes + hour-gap markers + identity reminder), + * provider selection, admission gating, timeout, and token-usage + * stamping all live in `cognition/generate_response.rs`. This shim: + * + * 1. Builds the RAG context server-side (still TS — the + * `ChatRAGBuilder` factory + entity reads have not been ported + * to Rust yet; tracked separately). + * 2. Adapts the RAG context onto `AIDecisionContext` and hands off + * to `AIDecisionService.generateResponse`, which is the proven + * IPC seam already used by PersonaUser's response path. + * 3. Translates the Rust result back to `AIGenerateResult`. + * + * Direct-message and preview modes remain TS-side because they are + * introspection/test paths that bypass admission and provider + * selection — Rust intentionally does not expose a "skip the gate" + * code path. */ - import { AIGenerateCommand } from '../shared/AIGenerateCommand'; import type { JTAGContext } from '../../../../system/core/types/JTAGTypes'; import type { ICommandDaemon } from '../../../../daemons/command-daemon/shared/CommandBase'; @@ -14,13 +28,12 @@ import { paramsToRequest, responseToResult, createErrorResult, createAIGenerateR import { AIProviderDaemon } from '../../../../daemons/ai-provider-daemon/shared/AIProviderDaemon'; import { RAGBuilderFactory } from '../../../../system/rag/shared/RAGBuilder'; import { getContextWindow, getInferenceSpeed } from '../../../../system/shared/ModelContextWindows'; -import type { RAGContext } from '../../../../system/rag/shared/RAGTypes'; import { ChatRAGBuilder } from '../../../../system/rag/builders/ChatRAGBuilder'; import { ORM } from '../../../../daemons/data-daemon/server/ORM'; import { UserEntity } from '../../../../system/data/entities/UserEntity'; +import { ChatMessageEntity } from '../../../../system/data/entities/ChatMessageEntity'; import type { TextGenerationRequest } from '../../../../daemons/ai-provider-daemon/shared/AIProviderTypesV2'; -import { SystemPaths } from '../../../../system/core/config/SystemPaths'; -import { LOCAL_MODELS } from '../../../../system/shared/Constants'; +import { AIDecisionService, type AIDecisionContext } from '../../../../system/ai/server/AIDecisionService'; export class AIGenerateServerCommand extends AIGenerateCommand { constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) { @@ -34,16 +47,11 @@ export class AIGenerateServerCommand extends AIGenerateCommand { async execute(params: AIGenerateParams): Promise { try { - let request: TextGenerationRequest; - let ragContext: RAGContext | undefined = undefined; - - // Mode selection: RAG context building OR direct messages + // RAG MODE: build context, delegate to Rust generate-response if (params.roomId) { - // RAG MODE: Build context from chat room (SAME code path as PersonaUser) - // Find persona if not specified let targetPersonaId = params.personaId; - let personaDisplayName = 'ai-generate-command'; // Fallback name for tracking + let personaDisplayName = 'ai-generate-command'; if (!targetPersonaId) { const usersResult = await ORM.query({ collection: UserEntity.collection, @@ -60,9 +68,8 @@ export class AIGenerateServerCommand extends AIGenerateCommand { personaDisplayName = personaRecord.data.displayName; } - // Build RAG context (SAME code as PersonaUser.respondToMessage line 207-215) const ragBuilder = RAGBuilderFactory.getBuilder('chat'); - ragContext = await ragBuilder.buildContext( + const ragContext = await ragBuilder.buildContext( params.roomId, targetPersonaId, { @@ -78,100 +85,152 @@ export class AIGenerateServerCommand extends AIGenerateCommand { } ); - // Convert to messages array with timestamps + gaps (SAME as PersonaUser.ts:376-415) - const messages: TextGenerationRequest['messages'] = []; - messages.push({ - role: 'system', - content: ragContext.identity.systemPrompt - }); - - // Add conversation history with timestamp formatting + gap detection - let lastTimestamp: number | undefined; - for (const msg of ragContext.conversationHistory) { - let timePrefix = ''; - if (msg.timestamp) { - const date = new Date(msg.timestamp); - const hours = date.getHours().toString().padStart(2, '0'); - const minutes = date.getMinutes().toString().padStart(2, '0'); - timePrefix = `[${hours}:${minutes}] `; - - // Detect significant time gaps (> 1 hour) - if (lastTimestamp && (msg.timestamp - lastTimestamp > 3600000)) { - const gapHours = Math.floor((msg.timestamp - lastTimestamp) / 3600000); - messages.push({ - role: 'system', - content: `⏱️ ${gapHours} hour${gapHours > 1 ? 's' : ''} passed - conversation resumed` - }); - } - lastTimestamp = msg.timestamp; - } - - messages.push({ - role: msg.role, - content: msg.name ? `${timePrefix}${msg.name}: ${msg.content}` : `${timePrefix}${msg.content}` + // PREVIEW MODE: reconstruct the request Rust would build (best-effort + // mirror; the source of truth is `build_response_generation_request` + // in cognition/generate_response.rs). Returns without inference. + if (params.preview) { + const previewRequest = this.previewRequestFromRag(params, ragContext, targetPersonaId, personaDisplayName); + const formatted = this.formatRequestPreview(previewRequest, ragContext); + return createAIGenerateResultFromParams(params, { + success: true, + preview: true, + request: previewRequest, + formatted, + ragContext: ragContext as unknown as Record }); } - // Identity reminder with current time - const now = new Date(); - const currentTime = `${now.toLocaleDateString('en-US', { month: '2-digit', day: '2-digit', year: 'numeric' })} ${now.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false })}`; - messages.push({ - role: 'system', - content: `IDENTITY REMINDER: You are ${ragContext.identity.name}. Respond naturally with JUST your message - NO name prefix.\n\nCURRENT TIME: ${currentTime}\n\nIMPORTANT: Pay attention to timestamps [HH:MM]. If messages are from hours ago but current question is recent, topic likely changed. Focus on MOST RECENT message.` - }); - - // Build request with personaContext for proper logging and routing - request = { - messages, - model: params.model || LOCAL_MODELS.DEFAULT, - temperature: params.temperature ?? 0.7, - maxTokens: params.maxTokens ?? 150, - // Default to 'local' (DMR via Rust IPC), NEVER a cloud provider. - // Continuum's architectural point is local models; cloud providers - // are opt-in via explicit --provider, not silent fallback. Pre-fix - // the default was 'candle' which is misleading (Candle is a - // training framework, not inference) and Rust's routing for an - // unknown provider could pick a registered cloud adapter (Carl's - // #980 Bug 7: silent DeepSeek 401 with no key configured). 'local' - // explicitly routes to Rust→DMR; if DMR isn't running, Rust - // hard-fails with an actionable error instead of silently falling - // through to a cloud provider that requires a key the user never - // set. Joel: "deepseek can't be a fallback" / "whole point is - // local models, make them work." - provider: params.provider || 'local', - personaContext: { - uniqueId: targetPersonaId, - displayName: ragContext.identity?.name || personaDisplayName, - logDir: SystemPaths.personas.dir(targetPersonaId) - } + // Adapt onto AIDecisionContext for the Rust shim. + // triggerMessage is the latest history entry — Rust uses it for + // the admission lease/artifact key, not for prompt content. + const history = ragContext.conversationHistory; + const triggerMessage = this.synthesizeTriggerMessage(history, params.roomId); + const decisionContext: AIDecisionContext = { + personaId: targetPersonaId, + personaName: ragContext.identity?.name || personaDisplayName, + roomId: params.roomId, + triggerMessage, + ragContext, + systemPrompt: ragContext.identity.systemPrompt, }; - } else if (params.messages) { - // DIRECT MODE: Use provided messages - request = paramsToRequest(params); - - } else { - return createErrorResult(params, 'Either roomId or messages must be provided'); - } - - // PREVIEW MODE: Return request without calling LLM - if (params.preview) { - const formatted = this.formatRequestPreview(request, ragContext); + const generation = await AIDecisionService.generateResponse(decisionContext, { + model: params.model, + temperature: params.temperature, + maxTokens: params.maxTokens, + }); return createAIGenerateResultFromParams(params, { success: true, - preview: true, - request, - formatted, - ragContext: ragContext as unknown as Record + text: generation.text, + model: generation.model, + provider: params.provider || 'local', + responseTimeMs: generation.responseTime, + requestId: undefined, + usage: generation.tokensUsed + ? { + inputTokens: generation.tokensUsed.input, + outputTokens: generation.tokensUsed.output, + totalTokens: generation.tokensUsed.total, + } + : undefined, }); } - // GENERATION MODE: Call AIProviderDaemon - const response = await AIProviderDaemon.generateText(request); - return responseToResult(response, params); + // DIRECT MODE: pass-through to AIProviderDaemon. No admission gate + // here — direct mode is a test/introspection path; production + // traffic comes through RAG mode above. + if (params.messages) { + const request: TextGenerationRequest = paramsToRequest(params); + + if (params.preview) { + const formatted = this.formatRequestPreview(request, undefined); + return createAIGenerateResultFromParams(params, { + success: true, + preview: true, + request, + formatted, + ragContext: undefined + }); + } + + const response = await AIProviderDaemon.generateText(request); + return responseToResult(response, params); + } + + return createErrorResult(params, 'Either roomId or messages must be provided'); } catch (error) { return createErrorResult(params, error instanceof Error ? error.message : String(error)); } } + + private previewRequestFromRag( + params: AIGenerateParams, + ragContext: import('../../../../system/rag/shared/RAGTypes').RAGContext, + targetPersonaId: string, + personaDisplayName: string + ): TextGenerationRequest { + // Mirror of what cognition/generate_response.rs assembles. Kept + // local so --preview stays useful without IPC. If the Rust prompt + // assembly changes, this drifts — wire a `cognition/preview-request` + // IPC if drift becomes a problem. + const messages: TextGenerationRequest['messages'] = [ + { role: 'system', content: ragContext.identity.systemPrompt } + ]; + let lastTimestamp: number | undefined; + for (const msg of ragContext.conversationHistory) { + let timePrefix = ''; + if (msg.timestamp) { + const date = new Date(msg.timestamp); + const hours = date.getHours().toString().padStart(2, '0'); + const minutes = date.getMinutes().toString().padStart(2, '0'); + timePrefix = `[${hours}:${minutes}] `; + if (lastTimestamp && (msg.timestamp - lastTimestamp > 3600000)) { + const gapHours = Math.floor((msg.timestamp - lastTimestamp) / 3600000); + messages.push({ + role: 'system', + content: `⏱️ ${gapHours} hour${gapHours > 1 ? 's' : ''} passed - conversation resumed` + }); + } + lastTimestamp = msg.timestamp; + } + messages.push({ + role: msg.role, + content: msg.name ? `${timePrefix}${msg.name}: ${msg.content}` : `${timePrefix}${msg.content}` + }); + } + const now = new Date(); + const currentTime = `${now.toLocaleDateString('en-US', { month: '2-digit', day: '2-digit', year: 'numeric' })} ${now.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false })}`; + messages.push({ + role: 'system', + content: `IDENTITY REMINDER: You are ${ragContext.identity?.name || personaDisplayName}. Respond naturally with JUST your message - NO name prefix.\n\nCURRENT TIME: ${currentTime}\n\nIMPORTANT: Pay attention to timestamps [HH:MM]. If messages are from hours ago but current question is recent, topic likely changed. Focus on MOST RECENT message.` + }); + return { + messages, + model: params.model, + temperature: params.temperature ?? 0.7, + maxTokens: params.maxTokens ?? 150, + provider: params.provider || 'local', + personaContext: { + uniqueId: targetPersonaId, + displayName: ragContext.identity?.name || personaDisplayName, + logDir: '' + } + }; + } + + private synthesizeTriggerMessage( + history: import('../../../../system/rag/shared/RAGTypes').RAGContext['conversationHistory'], + roomId: string + ): ChatMessageEntity { + // Latest message is the trigger. Rust uses this for the admission + // lease key (room+persona+messageId) — the prompt content comes + // from ragContext.conversationHistory regardless. + const last = history[history.length - 1]; + const msg = new ChatMessageEntity(); + msg.roomId = roomId as ChatMessageEntity['roomId']; + msg.content = { text: last?.content ?? '', media: [] }; + msg.timestamp = new Date(last?.timestamp ?? Date.now()); + return msg; + } } diff --git a/src/eslint-baseline.txt b/src/eslint-baseline.txt index 38627a6f0..7e30bed39 100644 --- a/src/eslint-baseline.txt +++ b/src/eslint-baseline.txt @@ -1 +1 @@ -5432 +5431