Skip to content
128 changes: 127 additions & 1 deletion packages/core/src/llm-core/agent/legacy-executor.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { CallbackManagerForChainRun } from '@langchain/core/callbacks/manager'
import { AIMessage, AIMessageChunk } from '@langchain/core/messages'
import {
AIMessage,
AIMessageChunk,
BaseMessage,
HumanMessage
} from '@langchain/core/messages'
import { isDirectToolOutput } from '@langchain/core/messages/tool'
import { OutputParserException } from '@langchain/core/output_parsers'
import {
Expand Down Expand Up @@ -34,6 +39,8 @@ import {
MessageQueue,
ScratchpadEntry
} from './types'
import { compressChunk } from '../chain/infinite_context_chain'
import type { ChatLunaChatModel } from '../platform/model'

async function executeTools(
actions: AgentAction[],
Expand Down Expand Up @@ -289,6 +296,29 @@ export async function* runAgent(
}
}

// Compress scratchpad if input tokens are approaching context limit
const model = config?.configurable?.['model'] as
| ChatLunaChatModel
| undefined
if (model && scratchpad.length > 6) {
// Get input_tokens from the AI message that triggered tool calls
const aiMsg = output[0]?.['messageLog']?.[0] as
| AIMessage
| undefined
const inputTokens = (aiMsg as AIMessage)?.usage_metadata
?.input_tokens
if (inputTokens > 0) {
await compressScratchpad(
scratchpad,
options.input,
model,
config?.configurable?.['conversationId'] ?? '',
inputTokens,
signal
)
}
}

const last = newSteps[newSteps.length - 1]
const tool = last ? toolMap[last.action.tool?.toLowerCase()] : undefined

Expand Down Expand Up @@ -345,6 +375,102 @@ export async function* runAgent(
}
}

/**
* Compress scratchpad when input tokens approach context limit.
* Summarizes early scratchpad + chat_history, keeps recent entries.
*/
async function compressScratchpad(
scratchpad: ScratchpadEntry[],
input: ChainValues,
model: ChatLunaChatModel,
conversationId: string,
inputTokens: number,
signal?: AbortSignal
): Promise<void> {
Comment thread
dingyi222666 marked this conversation as resolved.
const invocation = model.invocationParams()
const limit =
invocation.maxTokenLimit && invocation.maxTokenLimit > 0
? invocation.maxTokenLimit
: model.getModelMaxContextSize()

if (!limit || limit <= 0 || inputTokens < limit * 0.85) return

logger.info(
'[ScratchpadCompress] %d tokens exceed 85%% of %d, compressing',
inputTokens,
limit
)

const keepCount = Math.min(3, scratchpad.length)
const toCompress = scratchpad.slice(0, scratchpad.length - keepCount)
if (toCompress.length === 0) return

const chatHistory = (input['chat_history'] ?? []) as BaseMessage[]
const chatPart = chatHistory
.map((msg) => {
const content =
typeof msg.content === 'string'
? msg.content.trim()
: JSON.stringify(msg.content)
return `[${msg.getType().toUpperCase()}${msg.name ? ` (${msg.name})` : ''}]\n${content || '(empty)'}`
})
.join('\n\n---\n\n')

const scratchPart = toCompress
.map((entry) => {
if ('messages' in entry) {
return entry.messages
.map((m) => {
const c =
typeof m.content === 'string'
? m.content.trim()
: JSON.stringify(m.content)
return `[HUMAN]\n${c}`
})
.join('\n\n---\n\n')
}
const inp =
typeof entry.action.toolInput === 'string'
? entry.action.toolInput
: JSON.stringify(entry.action.toolInput)
const obs = observationToMessageContent(entry.observation)
return `[AI Tool Call: ${entry.action.tool}]\n${inp.slice(0, 300)}\n\n[TOOL Result]\n${obs.slice(0, 500)}`
})
.join('\n\n---\n\n')

const transcript = chatPart
? `${chatPart}\n\n---\n\n${scratchPart}`
: scratchPart
if (!transcript.trim()) return

try {
const summary = await compressChunk(
model,
transcript,
conversationId,
signal
)
if (!summary?.text.trim()) return

input['chat_history'] = [
new HumanMessage({
content: summary.text.trim(),
name: 'infinite_context',
additional_kwargs: { source: 'scratchpad-compression' }
})
]
scratchpad.splice(0, scratchpad.length - keepCount)

logger.info(
'[ScratchpadCompress] Compressed %d entries, kept %d',
toCompress.length,
keepCount
)
} catch (e) {
logger.error('[ScratchpadCompress] Failed:', e)
}
}

export async function emitAgentEvent(
runManager: CallbackManagerForChainRun | undefined,
configurable: AgentRuntimeConfigurable,
Expand Down
133 changes: 30 additions & 103 deletions packages/core/src/llm-core/chain/infinite_context_chain.ts
Original file line number Diff line number Diff line change
@@ -1,57 +1,16 @@
import { ChainValues } from '@langchain/core/utils/types'
import { PromptTemplate } from '@langchain/core/prompts'
import { AIMessage, type UsageMetadata } from '@langchain/core/messages'
import { BufferMemory } from 'koishi-plugin-chatluna/llm-core/memory/langchain'
import {
ChatLunaLLMCallArg,
ChatLunaLLMChain,
ChatLunaLLMChainWrapper
} from 'koishi-plugin-chatluna/llm-core/chain/base'
import { ChatLunaLLMChain } from 'koishi-plugin-chatluna/llm-core/chain/base'
import { ChatLunaChatModel } from 'koishi-plugin-chatluna/llm-core/platform/model'
import {
ChatLunaError,
ChatLunaErrorCode
} from 'koishi-plugin-chatluna/utils/error'
import { getMessageContent } from 'koishi-plugin-chatluna/utils/string'

export interface ChatLunaInfiniteContextChainInput {
historyMemory: BufferMemory
}

export interface ChatLunaInfiniteContextChunkArg {
chunk: string
conversationId: string
signal?: AbortSignal
}

export interface ChatLunaInfiniteContextChunkResult {
export interface CompressChunkResult {
text: string
usageMetadata?: UsageMetadata
}

export class ChatLunaInfiniteContextChain
extends ChatLunaLLMChainWrapper
implements ChatLunaInfiniteContextChainInput
{
historyMemory: BufferMemory

private chain: ChatLunaLLMChain

constructor({
historyMemory,
chain
}: ChatLunaInfiniteContextChainInput & { chain: ChatLunaLLMChain }) {
super()
this.historyMemory = historyMemory
this.chain = chain
}

static fromLLM(
llm: ChatLunaChatModel,
{ historyMemory }: ChatLunaInfiniteContextChainInput
) {
const prompt =
PromptTemplate.fromTemplate(`You are a helpful AI assistant tasked with summarizing conversations.
const COMPRESS_PROMPT =
PromptTemplate.fromTemplate(`You are a helpful AI assistant tasked with summarizing conversations.

When asked to summarize, provide a detailed but concise summary of the conversation.
Focus on information that would be helpful for continuing the conversation, including:
Expand All @@ -61,6 +20,7 @@ Focus on information that would be helpful for continuing the conversation, incl
- What needs to be done next
- Key user requests, constraints, or preferences that should persist
- Important technical decisions and why they were made
- Tool calls that were made and their results (summarize the key outcomes)

Some old tool result messages may say that the original tool output expired and was removed.
Treat those as intentional retention placeholders, not as meaningful tool output.
Expand All @@ -72,72 +32,39 @@ Do not respond to any questions in the conversation, only output the summary.
Conversation:
{conversation_chunk}`)

const chain = new ChatLunaLLMChain({ llm, prompt })
export async function compressChunk(
model: ChatLunaChatModel,
transcript: string,
conversationId: string,
signal?: AbortSignal
): Promise<CompressChunkResult | null> {
const trimmed = transcript?.trim()

return new ChatLunaInfiniteContextChain({
historyMemory,
chain
})
if (!trimmed) {
return null
}

async compressChunk({
chunk,
conversationId,
signal
}: ChatLunaInfiniteContextChunkArg): Promise<ChatLunaInfiniteContextChunkResult | null> {
const trimmedChunk = chunk?.trim()

if (!trimmedChunk) {
return null
}

const result = await this.chain.invoke({
conversation_chunk: trimmedChunk,
id: conversationId,
stream: false,
signal
})

const rawMessage = (result['message'] ?? null) as AIMessage | null
const chain = new ChatLunaLLMChain({ llm: model, prompt: COMPRESS_PROMPT })

const text =
(result['text'] ?? '').toString().trim() ||
(rawMessage ? getMessageContent(rawMessage.content).trim() : '')

if (!text) {
return null
}

return {
text,
usageMetadata: rawMessage?.usage_metadata
}
}
const result = await chain.invoke({
conversation_chunk: trimmed,
id: conversationId,
stream: false,
signal
})

async call(
arg: ChatLunaLLMCallArg & { chunk?: string }
): Promise<ChainValues> {
const chunk = arg['chunk'] ?? getMessageContent(arg.message.content)
const rawMessage = (result['message'] ?? null) as AIMessage | null

if (!chunk?.trim()) {
throw new ChatLunaError(
ChatLunaErrorCode.UNKNOWN_ERROR,
new Error(
'Empty context chunk passed to Infinite Context chain'
)
)
}
const text =
(result['text'] ?? '').toString().trim() ||
(rawMessage ? getMessageContent(rawMessage.content).trim() : '')

return this.chain.invoke({
conversation_chunk: chunk,
id: arg.conversationId,
stream: arg.stream,
signal: arg.signal,
maxTokens: arg.maxToken
})
if (!text) {
return null
}

get model(): ChatLunaChatModel {
return this.chain.llm
return {
text,
usageMetadata: rawMessage?.usage_metadata
}
}
Loading