|
| 1 | +--- |
| 2 | +title: "Compaction" |
| 3 | +sidebarTitle: "Compaction" |
| 4 | +description: "Automatic context compaction to keep long conversations within token limits." |
| 5 | +--- |
| 6 | + |
| 7 | +## Overview |
| 8 | + |
| 9 | +Long conversations accumulate tokens across turns. Eventually the context window fills up, causing errors or degraded responses. Compaction solves this by automatically summarizing the conversation when token usage exceeds a threshold, then using that summary as the context for future turns. |
| 10 | + |
| 11 | +The `compaction` option on `chat.task()` handles this in both paths: |
| 12 | + |
| 13 | +- **Between tool-call steps** (inner loop) — via the AI SDK's `prepareStep`, compaction runs between tool calls within a single turn |
| 14 | +- **Between turns** (outer loop) — for single-step responses with no tool calls, where `prepareStep` never fires |
| 15 | + |
| 16 | +## Basic usage |
| 17 | + |
| 18 | +Provide `shouldCompact` to decide when to compact and `summarize` to generate the summary: |
| 19 | + |
| 20 | +```ts |
| 21 | +import { chat } from "@trigger.dev/sdk/ai"; |
| 22 | +import { streamText, generateText } from "ai"; |
| 23 | +import { openai } from "@ai-sdk/openai"; |
| 24 | + |
| 25 | +export const myChat = chat.task({ |
| 26 | + id: "my-chat", |
| 27 | + compaction: { |
| 28 | + shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000, |
| 29 | + summarize: async ({ messages }) => { |
| 30 | + const result = await generateText({ |
| 31 | + model: openai("gpt-4o-mini"), |
| 32 | + messages: [...messages, { role: "user", content: "Summarize this conversation concisely." }], |
| 33 | + }); |
| 34 | + return result.text; |
| 35 | + }, |
| 36 | + }, |
| 37 | + run: async ({ messages, signal }) => { |
| 38 | + return streamText({ |
| 39 | + ...chat.toStreamTextOptions({ registry }), |
| 40 | + messages, |
| 41 | + abortSignal: signal, |
| 42 | + }); |
| 43 | + }, |
| 44 | +}); |
| 45 | +``` |
| 46 | + |
| 47 | +<Note> |
| 48 | + The `prepareStep` for inner-loop compaction is automatically injected when you spread `chat.toStreamTextOptions()` into your `streamText` call. If you provide your own `prepareStep` after the spread, it overrides the auto-injected one. |
| 49 | +</Note> |
| 50 | + |
| 51 | +## How it works |
| 52 | + |
| 53 | +After each turn completes: |
| 54 | + |
| 55 | +1. `shouldCompact` is called with the current token usage |
| 56 | +2. If it returns `true`, `summarize` generates a summary from the model messages |
| 57 | +3. The **model messages** (sent to the LLM) are replaced with the summary |
| 58 | +4. The **UI messages** (persisted and displayed) are preserved by default |
| 59 | +5. The `onCompacted` hook fires if configured |
| 60 | + |
| 61 | +On the next turn, the LLM receives the compact summary instead of the full history — dramatically reducing token usage while preserving context. |
| 62 | + |
| 63 | +## Customizing what gets persisted |
| 64 | + |
| 65 | +By default, compaction only affects model messages — UI messages stay intact so users see the full conversation after a page refresh. You can customize this with `compactUIMessages`: |
| 66 | + |
| 67 | +### Summary + recent messages |
| 68 | + |
| 69 | +Replace older messages with a summary but keep the last few exchanges visible: |
| 70 | + |
| 71 | +```ts |
| 72 | +import { generateId } from "ai"; |
| 73 | + |
| 74 | +export const myChat = chat.task({ |
| 75 | + id: "my-chat", |
| 76 | + compaction: { |
| 77 | + shouldCompact: ({ totalTokens }) => (totalTokens ?? 0) > 80_000, |
| 78 | + summarize: async ({ messages }) => { |
| 79 | + return generateText({ |
| 80 | + model: openai("gpt-4o-mini"), |
| 81 | + messages: [...messages, { role: "user", content: "Summarize." }], |
| 82 | + }).then((r) => r.text); |
| 83 | + }, |
| 84 | + compactUIMessages: ({ uiMessages, summary }) => [ |
| 85 | + { |
| 86 | + id: generateId(), |
| 87 | + role: "assistant", |
| 88 | + parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }], |
| 89 | + }, |
| 90 | + ...uiMessages.slice(-4), // Keep the last 4 messages |
| 91 | + ], |
| 92 | + }, |
| 93 | + run: async ({ messages, signal }) => { |
| 94 | + return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }); |
| 95 | + }, |
| 96 | +}); |
| 97 | +``` |
| 98 | + |
| 99 | +### Flatten to summary only |
| 100 | + |
| 101 | +Replace all messages with just the summary (like the LLM sees): |
| 102 | + |
| 103 | +```ts |
| 104 | +compactUIMessages: ({ summary }) => [ |
| 105 | + { |
| 106 | + id: generateId(), |
| 107 | + role: "assistant", |
| 108 | + parts: [{ type: "text", text: `[Conversation summary]\n\n${summary}` }], |
| 109 | + }, |
| 110 | +], |
| 111 | +``` |
| 112 | + |
| 113 | +## Customizing model messages |
| 114 | + |
| 115 | +By default, model messages are replaced with a single summary message. Use `compactModelMessages` to customize what the LLM sees after compaction: |
| 116 | + |
| 117 | +### Summary + recent context |
| 118 | + |
| 119 | +Keep the last few model messages so the LLM has recent detail alongside the summary: |
| 120 | + |
| 121 | +```ts |
| 122 | +compactModelMessages: ({ modelMessages, summary }) => [ |
| 123 | + { role: "user", content: summary }, |
| 124 | + ...modelMessages.slice(-2), // Keep last exchange for detail |
| 125 | +], |
| 126 | +``` |
| 127 | + |
| 128 | +### Keep tool results |
| 129 | + |
| 130 | +Preserve tool-call results so the LLM remembers what tools returned: |
| 131 | + |
| 132 | +```ts |
| 133 | +compactModelMessages: ({ modelMessages, summary }) => [ |
| 134 | + { role: "user", content: summary }, |
| 135 | + ...modelMessages.filter((m) => m.role === "tool"), |
| 136 | +], |
| 137 | +``` |
| 138 | + |
| 139 | +## shouldCompact event |
| 140 | + |
| 141 | +The `shouldCompact` callback receives context about the current state: |
| 142 | + |
| 143 | +| Field | Type | Description | |
| 144 | +|-------|------|-------------| |
| 145 | +| `messages` | `ModelMessage[]` | Current model messages | |
| 146 | +| `totalTokens` | `number \| undefined` | Total tokens from the triggering step/turn | |
| 147 | +| `inputTokens` | `number \| undefined` | Input tokens | |
| 148 | +| `outputTokens` | `number \| undefined` | Output tokens | |
| 149 | +| `usage` | `LanguageModelUsage` | Full usage object | |
| 150 | +| `totalUsage` | `LanguageModelUsage` | Cumulative usage across all turns | |
| 151 | +| `chatId` | `string` | Chat session ID | |
| 152 | +| `turn` | `number` | Current turn (0-indexed) | |
| 153 | +| `clientData` | `unknown` | Custom data from the frontend | |
| 154 | +| `source` | `"inner" \| "outer"` | Whether this is between steps or between turns | |
| 155 | +| `steps` | `CompactionStep[]` | Steps array (inner loop only) | |
| 156 | +| `stepNumber` | `number` | Step index (inner loop only) | |
| 157 | + |
| 158 | +## summarize event |
| 159 | + |
| 160 | +The `summarize` callback receives similar context: |
| 161 | + |
| 162 | +| Field | Type | Description | |
| 163 | +|-------|------|-------------| |
| 164 | +| `messages` | `ModelMessage[]` | Messages to summarize | |
| 165 | +| `usage` | `LanguageModelUsage` | Usage from the triggering step/turn | |
| 166 | +| `totalUsage` | `LanguageModelUsage` | Cumulative usage | |
| 167 | +| `chatId` | `string` | Chat session ID | |
| 168 | +| `turn` | `number` | Current turn | |
| 169 | +| `clientData` | `unknown` | Custom data from the frontend | |
| 170 | +| `source` | `"inner" \| "outer"` | Where compaction is running | |
| 171 | +| `stepNumber` | `number` | Step index (inner loop only) | |
| 172 | + |
| 173 | +## onCompacted hook |
| 174 | + |
| 175 | +Track compaction events for logging, billing, or analytics: |
| 176 | + |
| 177 | +```ts |
| 178 | +export const myChat = chat.task({ |
| 179 | + id: "my-chat", |
| 180 | + compaction: { ... }, |
| 181 | + onCompacted: async ({ summary, totalTokens, messageCount, chatId, turn }) => { |
| 182 | + logger.info("Compacted", { chatId, turn, totalTokens, messageCount }); |
| 183 | + await db.compactionLog.create({ |
| 184 | + data: { chatId, summary, totalTokens, messageCount }, |
| 185 | + }); |
| 186 | + }, |
| 187 | + run: async ({ messages, signal }) => { |
| 188 | + return streamText({ model: openai("gpt-4o"), messages, abortSignal: signal }); |
| 189 | + }, |
| 190 | +}); |
| 191 | +``` |
| 192 | + |
| 193 | +## Low-level compaction |
| 194 | + |
| 195 | +For `chat.createSession()` or raw task mode, use `chat.compact()` and `chat.compactionStep()` directly inside a custom `prepareStep`: |
| 196 | + |
| 197 | +```ts |
| 198 | +const result = streamText({ |
| 199 | + model: openai("gpt-4o"), |
| 200 | + messages, |
| 201 | + prepareStep: async ({ messages: stepMessages, steps }) => { |
| 202 | + const result = await chat.compact(stepMessages, steps, { |
| 203 | + threshold: 80_000, |
| 204 | + summarize: async (msgs) => |
| 205 | + generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text), |
| 206 | + }); |
| 207 | + return result.type === "skipped" ? undefined : result; |
| 208 | + }, |
| 209 | +}); |
| 210 | +``` |
| 211 | + |
| 212 | +Or use the higher-level `chat.compactionStep()` factory: |
| 213 | + |
| 214 | +```ts |
| 215 | +const result = streamText({ |
| 216 | + model: openai("gpt-4o"), |
| 217 | + messages, |
| 218 | + prepareStep: chat.compactionStep({ |
| 219 | + threshold: 80_000, |
| 220 | + summarize: async (msgs) => |
| 221 | + generateText({ model: openai("gpt-4o-mini"), messages: msgs }).then((r) => r.text), |
| 222 | + }), |
| 223 | +}); |
| 224 | +``` |
| 225 | + |
| 226 | +<Note> |
| 227 | + The low-level APIs only handle inner-loop compaction (between tool-call steps). For full coverage including single-step turns, use the `compaction` option on `chat.task()`. |
| 228 | +</Note> |
0 commit comments