Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 157 additions & 98 deletions src/commands/ai/generate/server/AIGenerateServerCommand.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
/**
* AI Generate Command - Server Implementation
* ============================================
* AI Generate Command - Server Implementation (thin shim)
* =======================================================
*
* Server-side AI generation with RAG context building
* All database access and LLM calls happen here
* Rust owns response generation: prompt assembly (system prompt +
* history + time prefixes + hour-gap markers + identity reminder),
* provider selection, admission gating, timeout, and token-usage
* stamping all live in `cognition/generate_response.rs`. This shim:
*
* 1. Builds the RAG context server-side (still TS — the
* `ChatRAGBuilder` factory + entity reads have not been ported
* to Rust yet; tracked separately).
* 2. Adapts the RAG context onto `AIDecisionContext` and hands off
* to `AIDecisionService.generateResponse`, which is the proven
* IPC seam already used by PersonaUser's response path.
* 3. Translates the Rust result back to `AIGenerateResult`.
*
* Direct-message and preview modes remain TS-side because they are
* introspection/test paths that bypass admission and provider
* selection — Rust intentionally does not expose a "skip the gate"
* code path.
*/

import { AIGenerateCommand } from '../shared/AIGenerateCommand';
import type { JTAGContext } from '../../../../system/core/types/JTAGTypes';
import type { ICommandDaemon } from '../../../../daemons/command-daemon/shared/CommandBase';
Expand All @@ -14,13 +28,12 @@ import { paramsToRequest, responseToResult, createErrorResult, createAIGenerateR
import { AIProviderDaemon } from '../../../../daemons/ai-provider-daemon/shared/AIProviderDaemon';
import { RAGBuilderFactory } from '../../../../system/rag/shared/RAGBuilder';
import { getContextWindow, getInferenceSpeed } from '../../../../system/shared/ModelContextWindows';
import type { RAGContext } from '../../../../system/rag/shared/RAGTypes';
import { ChatRAGBuilder } from '../../../../system/rag/builders/ChatRAGBuilder';
import { ORM } from '../../../../daemons/data-daemon/server/ORM';
import { UserEntity } from '../../../../system/data/entities/UserEntity';
import { ChatMessageEntity } from '../../../../system/data/entities/ChatMessageEntity';
import type { TextGenerationRequest } from '../../../../daemons/ai-provider-daemon/shared/AIProviderTypesV2';
import { SystemPaths } from '../../../../system/core/config/SystemPaths';
import { LOCAL_MODELS } from '../../../../system/shared/Constants';
import { AIDecisionService, type AIDecisionContext } from '../../../../system/ai/server/AIDecisionService';

export class AIGenerateServerCommand extends AIGenerateCommand {
constructor(context: JTAGContext, subpath: string, commander: ICommandDaemon) {
Expand All @@ -34,16 +47,11 @@ export class AIGenerateServerCommand extends AIGenerateCommand {

async execute(params: AIGenerateParams): Promise<AIGenerateResult> {
try {
let request: TextGenerationRequest;
let ragContext: RAGContext | undefined = undefined;

// Mode selection: RAG context building OR direct messages
// RAG MODE: build context, delegate to Rust generate-response
if (params.roomId) {
// RAG MODE: Build context from chat room (SAME code path as PersonaUser)

// Find persona if not specified
let targetPersonaId = params.personaId;
let personaDisplayName = 'ai-generate-command'; // Fallback name for tracking
let personaDisplayName = 'ai-generate-command';
if (!targetPersonaId) {
const usersResult = await ORM.query<UserEntity>({
collection: UserEntity.collection,
Expand All @@ -60,9 +68,8 @@ export class AIGenerateServerCommand extends AIGenerateCommand {
personaDisplayName = personaRecord.data.displayName;
}

// Build RAG context (SAME code as PersonaUser.respondToMessage line 207-215)
const ragBuilder = RAGBuilderFactory.getBuilder('chat');
ragContext = await ragBuilder.buildContext(
const ragContext = await ragBuilder.buildContext(
params.roomId,
targetPersonaId,
{
Expand All @@ -78,100 +85,152 @@ export class AIGenerateServerCommand extends AIGenerateCommand {
}
);

// Convert to messages array with timestamps + gaps (SAME as PersonaUser.ts:376-415)
const messages: TextGenerationRequest['messages'] = [];
messages.push({
role: 'system',
content: ragContext.identity.systemPrompt
});

// Add conversation history with timestamp formatting + gap detection
let lastTimestamp: number | undefined;
for (const msg of ragContext.conversationHistory) {
let timePrefix = '';
if (msg.timestamp) {
const date = new Date(msg.timestamp);
const hours = date.getHours().toString().padStart(2, '0');
const minutes = date.getMinutes().toString().padStart(2, '0');
timePrefix = `[${hours}:${minutes}] `;

// Detect significant time gaps (> 1 hour)
if (lastTimestamp && (msg.timestamp - lastTimestamp > 3600000)) {
const gapHours = Math.floor((msg.timestamp - lastTimestamp) / 3600000);
messages.push({
role: 'system',
content: `⏱️ ${gapHours} hour${gapHours > 1 ? 's' : ''} passed - conversation resumed`
});
}
lastTimestamp = msg.timestamp;
}

messages.push({
role: msg.role,
content: msg.name ? `${timePrefix}${msg.name}: ${msg.content}` : `${timePrefix}${msg.content}`
// PREVIEW MODE: reconstruct the request Rust would build (best-effort
// mirror; the source of truth is `build_response_generation_request`
// in cognition/generate_response.rs). Returns without inference.
if (params.preview) {
const previewRequest = this.previewRequestFromRag(params, ragContext, targetPersonaId, personaDisplayName);
const formatted = this.formatRequestPreview(previewRequest, ragContext);
return createAIGenerateResultFromParams(params, {
success: true,
preview: true,
request: previewRequest,
formatted,
ragContext: ragContext as unknown as Record<string, unknown>
});
}

// Identity reminder with current time
const now = new Date();
const currentTime = `${now.toLocaleDateString('en-US', { month: '2-digit', day: '2-digit', year: 'numeric' })} ${now.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false })}`;
messages.push({
role: 'system',
content: `IDENTITY REMINDER: You are ${ragContext.identity.name}. Respond naturally with JUST your message - NO name prefix.\n\nCURRENT TIME: ${currentTime}\n\nIMPORTANT: Pay attention to timestamps [HH:MM]. If messages are from hours ago but current question is recent, topic likely changed. Focus on MOST RECENT message.`
});

// Build request with personaContext for proper logging and routing
request = {
messages,
model: params.model || LOCAL_MODELS.DEFAULT,
temperature: params.temperature ?? 0.7,
maxTokens: params.maxTokens ?? 150,
// Default to 'local' (DMR via Rust IPC), NEVER a cloud provider.
// Continuum's architectural point is local models; cloud providers
// are opt-in via explicit --provider, not silent fallback. Pre-fix
// the default was 'candle' which is misleading (Candle is a
// training framework, not inference) and Rust's routing for an
// unknown provider could pick a registered cloud adapter (Carl's
// #980 Bug 7: silent DeepSeek 401 with no key configured). 'local'
// explicitly routes to Rust→DMR; if DMR isn't running, Rust
// hard-fails with an actionable error instead of silently falling
// through to a cloud provider that requires a key the user never
// set. Joel: "deepseek can't be a fallback" / "whole point is
// local models, make them work."
provider: params.provider || 'local',
personaContext: {
uniqueId: targetPersonaId,
displayName: ragContext.identity?.name || personaDisplayName,
logDir: SystemPaths.personas.dir(targetPersonaId)
}
// Adapt onto AIDecisionContext for the Rust shim.
// triggerMessage is the latest history entry — Rust uses it for
// the admission lease/artifact key, not for prompt content.
const history = ragContext.conversationHistory;
const triggerMessage = this.synthesizeTriggerMessage(history, params.roomId);
const decisionContext: AIDecisionContext = {
personaId: targetPersonaId,
personaName: ragContext.identity?.name || personaDisplayName,
roomId: params.roomId,
triggerMessage,
ragContext,
systemPrompt: ragContext.identity.systemPrompt,
};

} else if (params.messages) {
// DIRECT MODE: Use provided messages
request = paramsToRequest(params);

} else {
return createErrorResult(params, 'Either roomId or messages must be provided');
}

// PREVIEW MODE: Return request without calling LLM
if (params.preview) {
const formatted = this.formatRequestPreview(request, ragContext);
const generation = await AIDecisionService.generateResponse(decisionContext, {
model: params.model,
temperature: params.temperature,
maxTokens: params.maxTokens,
});

return createAIGenerateResultFromParams(params, {
success: true,
preview: true,
request,
formatted,
ragContext: ragContext as unknown as Record<string, unknown>
text: generation.text,
model: generation.model,
provider: params.provider || 'local',
responseTimeMs: generation.responseTime,
requestId: undefined,
usage: generation.tokensUsed
? {
inputTokens: generation.tokensUsed.input,
outputTokens: generation.tokensUsed.output,
totalTokens: generation.tokensUsed.total,
}
: undefined,
});
}

// GENERATION MODE: Call AIProviderDaemon
const response = await AIProviderDaemon.generateText(request);
return responseToResult(response, params);
// DIRECT MODE: pass-through to AIProviderDaemon. No admission gate
// here — direct mode is a test/introspection path; production
// traffic comes through RAG mode above.
if (params.messages) {
const request: TextGenerationRequest = paramsToRequest(params);

if (params.preview) {
const formatted = this.formatRequestPreview(request, undefined);
return createAIGenerateResultFromParams(params, {
success: true,
preview: true,
request,
formatted,
ragContext: undefined
});
}

const response = await AIProviderDaemon.generateText(request);
return responseToResult(response, params);
}

return createErrorResult(params, 'Either roomId or messages must be provided');
} catch (error) {
return createErrorResult(params, error instanceof Error ? error.message : String(error));
}
}

private previewRequestFromRag(
params: AIGenerateParams,
ragContext: import('../../../../system/rag/shared/RAGTypes').RAGContext,
targetPersonaId: string,
personaDisplayName: string
): TextGenerationRequest {
// Mirror of what cognition/generate_response.rs assembles. Kept
// local so --preview stays useful without IPC. If the Rust prompt
// assembly changes, this drifts — wire a `cognition/preview-request`
// IPC if drift becomes a problem.
const messages: TextGenerationRequest['messages'] = [
{ role: 'system', content: ragContext.identity.systemPrompt }
];
let lastTimestamp: number | undefined;
for (const msg of ragContext.conversationHistory) {
let timePrefix = '';
if (msg.timestamp) {
const date = new Date(msg.timestamp);
const hours = date.getHours().toString().padStart(2, '0');
const minutes = date.getMinutes().toString().padStart(2, '0');
timePrefix = `[${hours}:${minutes}] `;
if (lastTimestamp && (msg.timestamp - lastTimestamp > 3600000)) {
const gapHours = Math.floor((msg.timestamp - lastTimestamp) / 3600000);
messages.push({
role: 'system',
content: `⏱️ ${gapHours} hour${gapHours > 1 ? 's' : ''} passed - conversation resumed`
});
}
lastTimestamp = msg.timestamp;
}
messages.push({
role: msg.role,
content: msg.name ? `${timePrefix}${msg.name}: ${msg.content}` : `${timePrefix}${msg.content}`
});
}
const now = new Date();
const currentTime = `${now.toLocaleDateString('en-US', { month: '2-digit', day: '2-digit', year: 'numeric' })} ${now.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: false })}`;
messages.push({
role: 'system',
content: `IDENTITY REMINDER: You are ${ragContext.identity?.name || personaDisplayName}. Respond naturally with JUST your message - NO name prefix.\n\nCURRENT TIME: ${currentTime}\n\nIMPORTANT: Pay attention to timestamps [HH:MM]. If messages are from hours ago but current question is recent, topic likely changed. Focus on MOST RECENT message.`
});
return {
messages,
model: params.model,
temperature: params.temperature ?? 0.7,
maxTokens: params.maxTokens ?? 150,
provider: params.provider || 'local',
personaContext: {
uniqueId: targetPersonaId,
displayName: ragContext.identity?.name || personaDisplayName,
logDir: ''
}
};
}

private synthesizeTriggerMessage(
history: import('../../../../system/rag/shared/RAGTypes').RAGContext['conversationHistory'],
roomId: string
): ChatMessageEntity {
// Latest message is the trigger. Rust uses this for the admission
// lease key (room+persona+messageId) — the prompt content comes
// from ragContext.conversationHistory regardless.
const last = history[history.length - 1];
const msg = new ChatMessageEntity();
msg.roomId = roomId as ChatMessageEntity['roomId'];
msg.content = { text: last?.content ?? '', media: [] };
msg.timestamp = new Date(last?.timestamp ?? Date.now());
return msg;
}
}
2 changes: 1 addition & 1 deletion src/eslint-baseline.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5432
5431
Loading