better compaction support in our other chat variants

ericallam · ericallam · commit a4d970e73337 · 2026-03-24T15:44:03.000Z
diff --git a/packages/trigger-sdk/src/v3/ai.ts b/packages/trigger-sdk/src/v3/ai.ts
@@ -2908,6 +2908,11 @@ async function pipeChatAndCapture(
 class ChatMessageAccumulator {
   modelMessages: ModelMessage[] = [];
   uiMessages: UIMessage[] = [];
+  private _compaction?: ChatTaskCompactionOptions;
+
+  constructor(options?: { compaction?: ChatTaskCompactionOptions }) {
+    this._compaction = options?.compaction;
+  }
 
   /**
    * Add incoming messages from the transport payload.
@@ -2958,6 +2963,84 @@ class ChatMessageAccumulator {
       // Conversion failed — skip model message accumulation for this response
     }
   }
+
+  /**
+   * Returns a `prepareStep` function for inner-loop compaction.
+   * Only available when `compaction` was provided to the constructor.
+   * Pass the result to `streamText({ prepareStep: conversation.prepareStep() })`.
+   */
+  prepareStep(): ((args: { messages: ModelMessage[]; steps: CompactionStep[] }) => Promise<{ messages: ModelMessage[] } | undefined>) | undefined {
+    if (!this._compaction) return undefined;
+    const comp = this._compaction;
+    return async ({ messages, steps }) => {
+      const result = await chatCompact(messages, steps, {
+        shouldCompact: comp.shouldCompact,
+        summarize: (msgs) => comp.summarize({ messages: msgs, source: "inner" }),
+      });
+      return result.type === "skipped" ? undefined : result;
+    };
+  }
+
+  /**
+   * Run outer-loop compaction if needed. Call after adding the response
+   * and capturing usage. Applies `compactModelMessages` and `compactUIMessages`
+   * callbacks if configured.
+   *
+   * @returns `true` if compaction was performed, `false` otherwise.
+   */
+  async compactIfNeeded(usage: LanguageModelUsage | undefined, context?: {
+    chatId?: string;
+    turn?: number;
+    clientData?: unknown;
+    totalUsage?: LanguageModelUsage;
+  }): Promise<boolean> {
+    if (!this._compaction || !usage) return false;
+
+    const shouldTrigger = await this._compaction.shouldCompact({
+      messages: this.modelMessages,
+      totalTokens: usage.totalTokens,
+      inputTokens: usage.inputTokens,
+      outputTokens: usage.outputTokens,
+      usage,
+      totalUsage: context?.totalUsage,
+      chatId: context?.chatId,
+      turn: context?.turn,
+      clientData: context?.clientData,
+      source: "outer",
+    });
+
+    if (!shouldTrigger) return false;
+
+    const summary = await this._compaction.summarize({
+      messages: this.modelMessages,
+      usage,
+      totalUsage: context?.totalUsage,
+      chatId: context?.chatId,
+      turn: context?.turn,
+      clientData: context?.clientData,
+      source: "outer",
+    });
+
+    const compactEvent: CompactMessagesEvent = {
+      summary,
+      uiMessages: this.uiMessages,
+      modelMessages: this.modelMessages,
+      chatId: context?.chatId ?? "",
+      turn: context?.turn ?? 0,
+      clientData: context?.clientData,
+      source: "outer",
+    };
+
+    this.modelMessages = this._compaction.compactModelMessages
+      ? await this._compaction.compactModelMessages(compactEvent)
+      : [{ role: "assistant" as const, content: [{ type: "text" as const, text: `[Conversation summary]\n\n${summary}` }] }];
+
+    if (this._compaction.compactUIMessages) {
+      this.uiMessages = await this._compaction.compactUIMessages(compactEvent);
+    }
+
+    return true;
+  }
 }
 
 // ---------------------------------------------------------------------------
@@ -2973,6 +3056,8 @@ export type ChatSessionOptions = {
   timeout?: string;
   /** Max turns before ending. @default 100 */
   maxTurns?: number;
+  /** Automatic context compaction — same options as `chat.task({ compaction })`. */
+  compaction?: ChatTaskCompactionOptions;
 };
 
 export type ChatTurn = {
@@ -3065,6 +3150,7 @@ function createChatSession(
     idleTimeoutInSeconds = 30,
     timeout = "1h",
     maxTurns = 100,
+    compaction: sessionCompaction,
   } = options;
 
   return {
@@ -3168,14 +3254,62 @@ function createChatSession(
               }
 
               // Capture token usage from the streamText result
+              let turnUsage: LanguageModelUsage | undefined;
               if (typeof (source as any).totalUsage?.then === "function") {
                 try {
                   const usage: LanguageModelUsage = await (source as any).totalUsage;
+                  turnUsage = usage;
                   previousTurnUsage = usage;
                   cumulativeUsage = addUsage(cumulativeUsage, usage);
                 } catch { /* non-fatal */ }
               }
 
+              // Outer-loop compaction (same logic as chat.task)
+              if (sessionCompaction && turnUsage && !turnObj.stopped) {
+                const shouldTrigger = await sessionCompaction.shouldCompact({
+                  messages: accumulator.modelMessages,
+                  totalTokens: turnUsage.totalTokens,
+                  inputTokens: turnUsage.inputTokens,
+                  outputTokens: turnUsage.outputTokens,
+                  usage: turnUsage,
+                  totalUsage: cumulativeUsage,
+                  chatId: currentPayload.chatId,
+                  turn,
+                  clientData: currentPayload.metadata,
+                  source: "outer",
+                });
+
+                if (shouldTrigger) {
+                  const summary = await sessionCompaction.summarize({
+                    messages: accumulator.modelMessages,
+                    usage: turnUsage,
+                    totalUsage: cumulativeUsage,
+                    chatId: currentPayload.chatId,
+                    turn,
+                    clientData: currentPayload.metadata,
+                    source: "outer",
+                  });
+
+                  const compactEvent: CompactMessagesEvent = {
+                    summary,
+                    uiMessages: accumulator.uiMessages,
+                    modelMessages: accumulator.modelMessages,
+                    chatId: currentPayload.chatId,
+                    turn,
+                    clientData: currentPayload.metadata,
+                    source: "outer",
+                  };
+
+                  accumulator.modelMessages = sessionCompaction.compactModelMessages
+                    ? await sessionCompaction.compactModelMessages(compactEvent)
+                    : [{ role: "assistant" as const, content: [{ type: "text" as const, text: `[Conversation summary]\n\n${summary}` }] }];
+
+                  if (sessionCompaction.compactUIMessages) {
+                    accumulator.uiMessages = await sessionCompaction.compactUIMessages(compactEvent);
+                  }
+                }
+              }
+
               await chatWriteTurnComplete();
               return response;
             },
diff --git a/references/ai-chat/src/trigger/chat.ts b/references/ai-chat/src/trigger/chat.ts
@@ -1,7 +1,7 @@
 import { chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
 import { logger, task, prompts } from "@trigger.dev/sdk";
 import { streamText, generateText, tool, dynamicTool, stepCountIs, generateId, createProviderRegistry } from "ai";
-import type { LanguageModel, Tool as AITool, UIMessage } from "ai";
+import type { LanguageModel, LanguageModelUsage, Tool as AITool, UIMessage } from "ai";
 import { openai } from "@ai-sdk/openai";
 import { anthropic } from "@ai-sdk/anthropic";
 import { z } from "zod";
@@ -565,7 +565,27 @@ export const aiChatRaw = task({
     }
 
     const stop = chat.createStopSignal();
-    const conversation = new chat.MessageAccumulator();
+    const conversation = new chat.MessageAccumulator({
+      compaction: {
+        shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > COMPACT_AFTER_TOKENS,
+        summarize: async ({ messages: msgs }) => {
+          const resolved = await compactionPrompt.resolve({});
+          return generateText({
+            model: registry.languageModel(resolved.model ?? "openai:gpt-4o-mini"),
+            ...resolved.toAISDKTelemetry(),
+            messages: [...msgs, { role: "user" as const, content: resolved.text }],
+          }).then((r) => r.text);
+        },
+        // Flatten to summary only in the raw task variant
+        compactUIMessages: ({ summary }) => [
+          {
+            id: generateId(),
+            role: "assistant" as const,
+            parts: [{ type: "text" as const, text: `[Summary]\n\n${summary}` }],
+          },
+        ],
+      },
+    });
 
     for (let turn = 0; turn < 100; turn++) {
       stop.reset();
@@ -622,33 +642,7 @@ export const aiChatRaw = task({
             ...(useReasoning ? { thinking: { type: "enabled", budgetTokens: 10000 } } : {}),
           },
         },
-        // Low-level compaction using chat.compact() — gives full control
-        // while chat.compact handles the decision tree + stream chunks
-        prepareStep: async ({ messages: stepMessages, steps }) => {
-          // Custom logic before/around compaction
-          const lastStep = steps.at(-1);
-          if (lastStep?.usage.totalTokens) {
-            logger.info("Raw task: step usage", { totalTokens: lastStep.usage.totalTokens, turn });
-          }
-
-          const result = await chat.compact(stepMessages, steps, {
-            threshold: COMPACT_AFTER_TOKENS,
-            summarize: async (msgs) => {
-              const resolved = await compactionPrompt.resolve({});
-              return generateText({
-                model: registry.languageModel(resolved.model ?? "openai:gpt-4o-mini"),
-                ...resolved.toAISDKTelemetry(),
-                messages: [...msgs, { role: "user" as const, content: resolved.text }],
-              }).then((r) => r.text);
-            },
-          });
-
-          if (result.type === "compacted") {
-            logger.info("Raw task: compacted", { summary: result.summary.slice(0, 100) });
-          }
-
-          return result.type === "skipped" ? undefined : result;
-        },
+        prepareStep: conversation.prepareStep(),
       });
 
       let response: UIMessage | undefined;
@@ -673,6 +667,14 @@ export const aiChatRaw = task({
 
       if (runSignal.aborted) break;
 
+      // Outer-loop compaction — runs if token threshold exceeded
+      let turnUsage: LanguageModelUsage | undefined;
+      try { turnUsage = await result.totalUsage; } catch { /* non-fatal */ }
+      await conversation.compactIfNeeded(turnUsage, {
+        chatId: currentPayload.chatId,
+        turn,
+      });
+
       // Persist messages
       await prisma.chat.update({
         where: { id: currentPayload.chatId },
@@ -722,6 +724,26 @@ export const aiChatSession = task({
       signal,
       idleTimeoutInSeconds: payload.idleTimeoutInSeconds ?? 60,
       timeout: "1h",
+      compaction: {
+        shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > COMPACT_AFTER_TOKENS,
+        summarize: async ({ messages: msgs }) => {
+          const resolved = await compactionPrompt.resolve({});
+          return generateText({
+            model: registry.languageModel(resolved.model ?? "openai:gpt-4o-mini"),
+            ...resolved.toAISDKTelemetry(),
+            messages: [...msgs, { role: "user" as const, content: resolved.text }],
+          }).then((r) => r.text);
+        },
+        // Keep summary + last 4 messages in the session variant
+        compactUIMessages: ({ uiMessages, summary }) => [
+          {
+            id: generateId(),
+            role: "assistant" as const,
+            parts: [{ type: "text" as const, text: `[Conversation summary]\n\n${summary}` }],
+          },
+          ...uiMessages.slice(-4),
+        ],
+      },
     });
 
     for await (const turn of session) {
@@ -754,31 +776,6 @@ export const aiChatSession = task({
             ...(useReasoning ? { thinking: { type: "enabled", budgetTokens: 10000 } } : {}),
           },
         },
-        // Low-level compaction — same pattern as raw task
-        prepareStep: async ({ messages: stepMessages, steps }) => {
-          const lastStep = steps.at(-1);
-          if (lastStep?.usage.totalTokens) {
-            logger.info("Session: step usage", { totalTokens: lastStep.usage.totalTokens, turn: turn.number });
-          }
-
-          const result = await chat.compact(stepMessages, steps, {
-            threshold: COMPACT_AFTER_TOKENS,
-            summarize: async (msgs) => {
-              const resolved = await compactionPrompt.resolve({});
-              return generateText({
-                model: registry.languageModel(resolved.model ?? "openai:gpt-4o-mini"),
-                ...resolved.toAISDKTelemetry(),
-                messages: [...msgs, { role: "user" as const, content: resolved.text }],
-              }).then((r) => r.text);
-            },
-          });
-
-          if (result.type === "compacted") {
-            logger.info("Session: compacted", { summary: result.summary.slice(0, 100) });
-          }
-
-          return result.type === "skipped" ? undefined : result;
-        },
       });
 
       await turn.complete(result);