ChatLunaLab · dingyi222666 · May 24, 2026 · May 23, 2026 · May 23, 2026 · May 23, 2026
diff --git a/packages/core/src/llm-core/agent/legacy-executor.ts b/packages/core/src/llm-core/agent/legacy-executor.ts
@@ -1,5 +1,10 @@
 import { CallbackManagerForChainRun } from '@langchain/core/callbacks/manager'
-import { AIMessage, AIMessageChunk } from '@langchain/core/messages'
+import {
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    HumanMessage
+} from '@langchain/core/messages'
 import { isDirectToolOutput } from '@langchain/core/messages/tool'
 import { OutputParserException } from '@langchain/core/output_parsers'
 import {
@@ -34,6 +39,8 @@ import {
     MessageQueue,
     ScratchpadEntry
 } from './types'
+import { compressChunk } from '../chain/infinite_context_chain'
+import type { ChatLunaChatModel } from '../platform/model'
 
 async function executeTools(
     actions: AgentAction[],
@@ -289,6 +296,29 @@ export async function* runAgent(
             }
         }
 
+        // Compress scratchpad if input tokens are approaching context limit
+        const model = config?.configurable?.['model'] as
+            | ChatLunaChatModel
+            | undefined
+        if (model && scratchpad.length > 6) {
+            // Get input_tokens from the AI message that triggered tool calls
+            const aiMsg = output[0]?.['messageLog']?.[0] as
+                | AIMessage
+                | undefined
+            const inputTokens = (aiMsg as AIMessage)?.usage_metadata
+                ?.input_tokens
+            if (inputTokens > 0) {
+                await compressScratchpad(
+                    scratchpad,
+                    options.input,
+                    model,
+                    config?.configurable?.['conversationId'] ?? '',
+                    inputTokens,
+                    signal
+                )
+            }
+        }
+
         const last = newSteps[newSteps.length - 1]
         const tool = last ? toolMap[last.action.tool?.toLowerCase()] : undefined
 
@@ -345,6 +375,102 @@ export async function* runAgent(
     }
 }
 
+/**
+ * Compress scratchpad when input tokens approach context limit.
+ * Summarizes early scratchpad + chat_history, keeps recent entries.
+ */
+async function compressScratchpad(
+    scratchpad: ScratchpadEntry[],
+    input: ChainValues,
+    model: ChatLunaChatModel,
+    conversationId: string,
+    inputTokens: number,
+    signal?: AbortSignal
+): Promise<void> {
+    const invocation = model.invocationParams()
+    const limit =
+        invocation.maxTokenLimit && invocation.maxTokenLimit > 0
+            ? invocation.maxTokenLimit
+            : model.getModelMaxContextSize()
+
+    if (!limit || limit <= 0 || inputTokens < limit * 0.85) return
+
+    logger.info(
+        '[ScratchpadCompress] %d tokens exceed 85%% of %d, compressing',
+        inputTokens,
+        limit
+    )
+
+    const keepCount = Math.min(3, scratchpad.length)
+    const toCompress = scratchpad.slice(0, scratchpad.length - keepCount)
+    if (toCompress.length === 0) return
+
+    const chatHistory = (input['chat_history'] ?? []) as BaseMessage[]
+    const chatPart = chatHistory
+        .map((msg) => {
+            const content =
+                typeof msg.content === 'string'
+                    ? msg.content.trim()
+                    : JSON.stringify(msg.content)
+            return `[${msg.getType().toUpperCase()}${msg.name ? ` (${msg.name})` : ''}]\n${content || '(empty)'}`
+        })
+        .join('\n\n---\n\n')
+
+    const scratchPart = toCompress
+        .map((entry) => {
+            if ('messages' in entry) {
+                return entry.messages
+                    .map((m) => {
+                        const c =
+                            typeof m.content === 'string'
+                                ? m.content.trim()
+                                : JSON.stringify(m.content)
+                        return `[HUMAN]\n${c}`
+                    })
+                    .join('\n\n---\n\n')
+            }
+            const inp =
+                typeof entry.action.toolInput === 'string'
+                    ? entry.action.toolInput
+                    : JSON.stringify(entry.action.toolInput)
+            const obs = observationToMessageContent(entry.observation)
+            return `[AI Tool Call: ${entry.action.tool}]\n${inp.slice(0, 300)}\n\n[TOOL Result]\n${obs.slice(0, 500)}`
+        })
+        .join('\n\n---\n\n')
+
+    const transcript = chatPart
+        ? `${chatPart}\n\n---\n\n${scratchPart}`
+        : scratchPart
+    if (!transcript.trim()) return
+
+    try {
+        const summary = await compressChunk(
+            model,
+            transcript,
+            conversationId,
+            signal
+        )
+        if (!summary?.text.trim()) return
+
+        input['chat_history'] = [
+            new HumanMessage({
+                content: summary.text.trim(),
+                name: 'infinite_context',
+                additional_kwargs: { source: 'scratchpad-compression' }
+            })
+        ]
+        scratchpad.splice(0, scratchpad.length - keepCount)
+
+        logger.info(
+            '[ScratchpadCompress] Compressed %d entries, kept %d',
+            toCompress.length,
+            keepCount
+        )
+    } catch (e) {
+        logger.error('[ScratchpadCompress] Failed:', e)
+    }
+}
+
 export async function emitAgentEvent(
     runManager: CallbackManagerForChainRun | undefined,
     configurable: AgentRuntimeConfigurable,

diff --git a/packages/core/src/llm-core/chain/infinite_context_chain.ts b/packages/core/src/llm-core/chain/infinite_context_chain.ts
@@ -1,57 +1,16 @@
-import { ChainValues } from '@langchain/core/utils/types'
 import { PromptTemplate } from '@langchain/core/prompts'
 import { AIMessage, type UsageMetadata } from '@langchain/core/messages'
-import { BufferMemory } from 'koishi-plugin-chatluna/llm-core/memory/langchain'
-import {
-    ChatLunaLLMCallArg,
-    ChatLunaLLMChain,
-    ChatLunaLLMChainWrapper
-} from 'koishi-plugin-chatluna/llm-core/chain/base'
+import { ChatLunaLLMChain } from 'koishi-plugin-chatluna/llm-core/chain/base'
 import { ChatLunaChatModel } from 'koishi-plugin-chatluna/llm-core/platform/model'
-import {
-    ChatLunaError,
-    ChatLunaErrorCode
-} from 'koishi-plugin-chatluna/utils/error'
 import { getMessageContent } from 'koishi-plugin-chatluna/utils/string'
 
-export interface ChatLunaInfiniteContextChainInput {
-    historyMemory: BufferMemory
-}
-
-export interface ChatLunaInfiniteContextChunkArg {
-    chunk: string
-    conversationId: string
-    signal?: AbortSignal
-}
-
-export interface ChatLunaInfiniteContextChunkResult {
+export interface CompressChunkResult {
     text: string
     usageMetadata?: UsageMetadata
 }
 
-export class ChatLunaInfiniteContextChain
-    extends ChatLunaLLMChainWrapper
-    implements ChatLunaInfiniteContextChainInput
-{
-    historyMemory: BufferMemory
-
-    private chain: ChatLunaLLMChain
-
-    constructor({
-        historyMemory,
-        chain
-    }: ChatLunaInfiniteContextChainInput & { chain: ChatLunaLLMChain }) {
-        super()
-        this.historyMemory = historyMemory
-        this.chain = chain
-    }
-
-    static fromLLM(
-        llm: ChatLunaChatModel,
-        { historyMemory }: ChatLunaInfiniteContextChainInput
-    ) {
-        const prompt =
-            PromptTemplate.fromTemplate(`You are a helpful AI assistant tasked with summarizing conversations.
+const COMPRESS_PROMPT =
+    PromptTemplate.fromTemplate(`You are a helpful AI assistant tasked with summarizing conversations.
 
 When asked to summarize, provide a detailed but concise summary of the conversation.
 Focus on information that would be helpful for continuing the conversation, including:
@@ -61,6 +20,7 @@ Focus on information that would be helpful for continuing the conversation, incl
 - What needs to be done next
 - Key user requests, constraints, or preferences that should persist
 - Important technical decisions and why they were made
+- Tool calls that were made and their results (summarize the key outcomes)
 
 Some old tool result messages may say that the original tool output expired and was removed.
 Treat those as intentional retention placeholders, not as meaningful tool output.
@@ -72,72 +32,39 @@ Do not respond to any questions in the conversation, only output the summary.
 Conversation:
 {conversation_chunk}`)
 
-        const chain = new ChatLunaLLMChain({ llm, prompt })
+export async function compressChunk(
+    model: ChatLunaChatModel,
+    transcript: string,
+    conversationId: string,
+    signal?: AbortSignal
+): Promise<CompressChunkResult | null> {
+    const trimmed = transcript?.trim()
 
-        return new ChatLunaInfiniteContextChain({
-            historyMemory,
-            chain
-        })
+    if (!trimmed) {
+        return null
     }
 
-    async compressChunk({
-        chunk,
-        conversationId,
-        signal
-    }: ChatLunaInfiniteContextChunkArg): Promise<ChatLunaInfiniteContextChunkResult | null> {
-        const trimmedChunk = chunk?.trim()
-
-        if (!trimmedChunk) {
-            return null
-        }
-
-        const result = await this.chain.invoke({
-            conversation_chunk: trimmedChunk,
-            id: conversationId,
-            stream: false,
-            signal
-        })
-
-        const rawMessage = (result['message'] ?? null) as AIMessage | null
+    const chain = new ChatLunaLLMChain({ llm: model, prompt: COMPRESS_PROMPT })
 
-        const text =
-            (result['text'] ?? '').toString().trim() ||
-            (rawMessage ? getMessageContent(rawMessage.content).trim() : '')
-
-        if (!text) {
-            return null
-        }
-
-        return {
-            text,
-            usageMetadata: rawMessage?.usage_metadata
-        }
-    }
+    const result = await chain.invoke({
+        conversation_chunk: trimmed,
+        id: conversationId,
+        stream: false,
+        signal
+    })
 
-    async call(
-        arg: ChatLunaLLMCallArg & { chunk?: string }
-    ): Promise<ChainValues> {
-        const chunk = arg['chunk'] ?? getMessageContent(arg.message.content)
+    const rawMessage = (result['message'] ?? null) as AIMessage | null
 
-        if (!chunk?.trim()) {
-            throw new ChatLunaError(
-                ChatLunaErrorCode.UNKNOWN_ERROR,
-                new Error(
-                    'Empty context chunk passed to Infinite Context chain'
-                )
-            )
-        }
+    const text =
+        (result['text'] ?? '').toString().trim() ||
+        (rawMessage ? getMessageContent(rawMessage.content).trim() : '')
 
-        return this.chain.invoke({
-            conversation_chunk: chunk,
-            id: arg.conversationId,
-            stream: arg.stream,
-            signal: arg.signal,
-            maxTokens: arg.maxToken
-        })
+    if (!text) {
+        return null
     }
 
-    get model(): ChatLunaChatModel {
-        return this.chain.llm
+    return {
+        text,
+        usageMetadata: rawMessage?.usage_metadata
     }
 }