feat(chat): add chat.inject() for background context injection and chat.defer improvements

ericallam · ericallam · commit 516c21edd3d0 · 2026-03-26T15:30:58.000Z
- chat.inject(): queue model messages from background work for injection
  at the next prepareStep boundary or before the next turn's run()
- Deferred work from onTurnComplete no longer blocks waiting for next message
- Background queue persists across turns (not reset) so deferred work from
  onTurnComplete can inject into the next turn
- Reference app: self-review pattern using generateObject + chat.inject()
- Hide transient data-turn-status and data-background-context-injected parts in UI
diff --git a/packages/trigger-sdk/src/v3/ai.ts b/packages/trigger-sdk/src/v3/ai.ts
@@ -500,6 +500,13 @@ const stopInput = streams.input<{ stop: true; message?: string }>({ id: CHAT_STO
  */
 const chatDeferKey = locals.create<Set<Promise<unknown>>>("chat.defer");
 
+/**
+ * Per-turn background context queue. Messages added via `chat.backgroundWork.inject()`
+ * are drained at the next `prepareStep` boundary and appended to the model messages.
+ * @internal
+ */
+const chatBackgroundQueueKey = locals.create<ModelMessage[]>("chat.backgroundQueue");
+
 /**
  * Run-scoped pipe counter. Stored in locals so concurrent runs in the
  * same worker don't share state.
@@ -1458,11 +1465,11 @@ function toStreamTextOptions(options?: ToStreamTextOptionsOptions): Record<strin
   const telemetry = prompt.toAISDKTelemetry(options?.telemetry);
   Object.assign(result, telemetry);
 
-  // Auto-inject prepareStep when compaction or pendingMessages is configured.
+  // Auto-inject prepareStep for compaction, pending messages, and background context injection.
   const taskCompaction = locals.get(chatTaskCompactionKey);
   const taskPendingMessages = locals.get(chatPendingMessagesKey);
 
-  if (taskCompaction || taskPendingMessages) {
+  {
     result.prepareStep = async ({ messages, steps }: { messages: ModelMessage[]; steps: CompactionStep[] }) => {
       let resultMessages: ModelMessage[] | undefined;
 
@@ -1501,6 +1508,13 @@ function toStreamTextOptions(options?: ToStreamTextOptionsOptions): Record<strin
         }
       }
 
+      // 3. Background context injection
+      const bgQueue = locals.get(chatBackgroundQueueKey);
+      if (bgQueue && bgQueue.length > 0) {
+        const injected = bgQueue.splice(0); // drain
+        resultMessages = [...(resultMessages ?? messages), ...injected];
+      }
+
       return resultMessages ? { messages: resultMessages } : undefined;
     };
   }
@@ -2324,6 +2338,9 @@ function chatTask<
               locals.set(chatDeferKey, new Set());
               locals.set(chatCompactionStateKey, undefined);
               locals.set(chatSteeringQueueKey, []);
+              // NOTE: chatBackgroundQueueKey is NOT reset here — messages injected
+              // by deferred work from the previous turn's onTurnComplete need to
+              // survive into the next turn. The queue is drained before run().
               locals.set(chatInjectedMessageIdsKey, new Set());
 
               // Store chat context for auto-detection by ai.tool subtasks
@@ -2537,6 +2554,12 @@ function chatTask<
               let runResult: unknown;
 
               try {
+                // Drain any messages injected by background work (e.g. self-review from previous turn)
+                const bgQueue = locals.get(chatBackgroundQueueKey);
+                if (bgQueue && bgQueue.length > 0) {
+                  accumulatedMessages.push(...bgQueue.splice(0));
+                }
+
                 runResult = await userRun({
                   ...restWire,
                   messages: await applyPrepareMessages(accumulatedMessages, "run"),
@@ -2926,6 +2949,12 @@ function chatTask<
                 );
               }
 
+              // NOTE: We intentionally do NOT await deferred work from onTurnComplete here.
+              // Promises deferred in onTurnComplete (e.g. background self-review via
+              // chat.defer + chat.inject) run during the idle wait. If they complete
+              // before the next message, their injected context is picked up in prepareStep.
+              // The pre-onBeforeTurnComplete drain handles promises from onTurnStart/run().
+
               // If messages arrived during streaming (without pendingMessages config),
               // use the first one immediately as the next turn.
               if (pendingMessages.length > 0) {
@@ -3154,6 +3183,43 @@ function chatDefer(promise: Promise<unknown>): void {
   }
 }
 
+// ---------------------------------------------------------------------------
+// Background context injection
+// ---------------------------------------------------------------------------
+
+/**
+ * Queue model messages for injection at the next `prepareStep` boundary.
+ *
+ * Use this to inject context from background work into the agent's conversation.
+ * Messages are appended to the model messages before the next LLM inference call.
+ *
+ * Combine with `chat.defer()` to run background analysis and inject results:
+ *
+ * @example
+ * ```ts
+ * onTurnComplete: async ({ messages }) => {
+ *   chat.defer((async () => {
+ *     const review = await generateObject({
+ *       model: openai("gpt-4o-mini"),
+ *       messages: [...messages, { role: "user", content: "Review the last response." }],
+ *       schema: z.object({ suggestions: z.array(z.string()) }),
+ *     });
+ *     if (review.object.suggestions.length > 0) {
+ *       chat.inject([{
+ *         role: "system",
+ *         content: `Improvements for next response:\n${review.object.suggestions.join("\n")}`,
+ *       }]);
+ *     }
+ *   })());
+ * },
+ * ```
+ */
+function injectBackgroundContext(messages: ModelMessage[]): void {
+  const queue = locals.get(chatBackgroundQueueKey) ?? [];
+  queue.push(...messages);
+  locals.set(chatBackgroundQueueKey, queue);
+}
+
 // ---------------------------------------------------------------------------
 // Aborted message cleanup
 // ---------------------------------------------------------------------------
@@ -4154,6 +4220,8 @@ export const chat = {
   cleanupAbortedParts,
   /** Register background work that runs in parallel with streaming. See {@link chatDefer}. */
   defer: chatDefer,
+  /** Queue model messages for injection at the next `prepareStep` boundary. See {@link injectBackgroundContext}. */
+  inject: injectBackgroundContext,
   /** Typed chat output stream for writing custom chunks or piping from subtasks. */
   stream: chatStream,
   /** Pre-built input stream for receiving messages from the transport. */
diff --git a/references/ai-chat/src/components/chat.tsx b/references/ai-chat/src/components/chat.tsx
@@ -476,6 +476,14 @@ export function Chat({
                     );
                   }
 
+                  // Transient status parts — hide from rendered output
+                  if (
+                    part.type === "data-turn-status" ||
+                    part.type === "data-background-context-injected"
+                  ) {
+                    return null;
+                  }
+
                   if (part.type === "data-research-progress") {
                     return <ResearchProgress key={i} part={part} />;
                   }
diff --git a/references/ai-chat/src/trigger/chat.ts b/references/ai-chat/src/trigger/chat.ts
@@ -1,6 +1,6 @@
 import { ai, chat, type ChatTaskWirePayload } from "@trigger.dev/sdk/ai";
 import { logger, schemaTask, task, prompts } from "@trigger.dev/sdk";
-import { streamText, generateText, tool, dynamicTool, stepCountIs, generateId, createProviderRegistry } from "ai";
+import { streamText, generateText, generateObject, tool, dynamicTool, stepCountIs, generateId, createProviderRegistry } from "ai";
 import type { LanguageModel, LanguageModelUsage, Tool as AITool, UIMessage } from "ai";
 import { openai } from "@ai-sdk/openai";
 import { anthropic } from "@ai-sdk/anthropic";
@@ -56,6 +56,20 @@ When the user asks you to research a topic, use the deep research tool with rele
 - Keep responses under a few paragraphs unless the user asks for more.`,
 });
 
+const selfReviewPrompt = prompts.define({
+  id: "ai-chat-self-review",
+  model: "openai:gpt-4o-mini",
+  content: `You are a conversation quality reviewer. Analyze the assistant's most recent response and provide structured feedback.
+
+Focus on:
+- Whether the response actually answered the user's question
+- Missed opportunities to use tools or provide more detail
+- Tone mismatches (too formal, too casual, etc.)
+- Factual claims that should have been verified with tools
+
+Be concise. Only flag issues worth fixing — don't nitpick.`,
+});
+
 const MODELS: Record<string, () => LanguageModel> = {
   "gpt-4o-mini": () => openai("gpt-4o-mini"),
   "gpt-4o": () => openai("gpt-4o"),
@@ -437,7 +451,7 @@ export const aiChat = chat.task({
     // Deferred — runs in parallel with streaming, awaited before onTurnComplete.
     chat.defer(prisma.chat.update({ where: { id: chatId }, data: { messages: uiMessages as any } }));
   },
-  onTurnComplete: async ({ chatId, uiMessages, runId, chatAccessToken, lastEventId }) => {
+  onTurnComplete: async ({ chatId, uiMessages, messages, runId, chatAccessToken, lastEventId }) => {
     // Persist final messages + assistant response + stream position
     await prisma.chat.update({
       where: { id: chatId },
@@ -459,6 +473,41 @@ export const aiChat = chat.task({
         },
       });
     }
+
+    // Background self-review — a cheap model critiques the response and injects
+    // coaching into the conversation before the next user message arrives.
+    chat.defer((async () => {
+      const resolved = await selfReviewPrompt.resolve({});
+
+      const review = await generateObject({
+        model: registry.languageModel(resolved.model ?? "openai:gpt-4o-mini"),
+        ...resolved.toAISDKTelemetry(),
+        system: resolved.text,
+        prompt: `Here is the conversation to review:\n\n${messages.filter((m) => m.role === "user" || m.role === "assistant").map((m) => `${m.role}: ${typeof m.content === "string" ? m.content : Array.isArray(m.content) ? m.content.filter((p: any) => p.type === "text").map((p: any) => p.text).join("") : ""}`).join("\n\n")}`,
+        schema: z.object({
+          needsImprovement: z.boolean().describe("Whether the response needs improvement"),
+          suggestions: z.array(z.string()).describe("Specific actionable suggestions for the next response"),
+          missedTools: z.array(z.string()).describe("Tool names the assistant should have used but didn't"),
+        }),
+      });
+
+      const parts = [];
+      if (review.object.suggestions.length > 0) {
+        parts.push(`Suggestions:\n${review.object.suggestions.map((s) => `- ${s}`).join("\n")}`);
+      }
+      if (review.object.missedTools.length > 0) {
+        parts.push(`Consider using: ${review.object.missedTools.join(", ")}`);
+      }
+
+      chat.inject([
+        {
+          role: "system" as const,
+          content: review.object.needsImprovement
+            ? `[Self-review of your previous response]\n\n${parts.join("\n\n")}\n\nApply these improvements naturally in your next response.`
+            : `[Self-review of your previous response]\n\nYour previous response was good. No changes needed.`,
+        },
+      ]);
+    })());
   },
   run: async ({ messages, clientData, stopSignal }) => {
     // Track usage

Original file line number	Diff line number	Diff line change
`@@ -476,6 +476,14 @@ export function Chat({`
`476`	`476`	`);`
`477`	`477`	`}`
`478`	`478`
	`479`	`+ // Transient status parts — hide from rendered output`
	`480`	`+ if (`
	`481`	`+ part.type === "data-turn-status" \|\|`
	`482`	`+ part.type === "data-background-context-injected"`
	`483`	`+ ) {`
	`484`	`+ return null;`
	`485`	`+ }`
	`486`	`+`
`479`	`487`	`if (part.type === "data-research-progress") {`
`480`	`488`	`return <ResearchProgress key={i} part={part} />;`
`481`	`489`	`}`