diff --git a/package.json b/package.json index 83bb614f626..616c27beeb9 100644 --- a/package.json +++ b/package.json @@ -45,7 +45,7 @@ "format": "prettier --write \"src/**/*.ts\"", "format:fix": "prettier --write \"src/**/*.ts\"", "format:check": "prettier --check \"src/**/*.ts\"", - "prepare": "husky", + "prepare": "bash -c 'if bash ./scripts/check-is-in-git-install.sh; then npm run build; else husky || true; fi'", "setup": "tsx setup/index.ts", "auth": "tsx src/whatsapp-auth.ts", "test:e2e:tasks": "tsx scripts/test-task-sdk-e2e.ts", diff --git a/scripts/check-is-in-git-install.sh b/scripts/check-is-in-git-install.sh new file mode 100755 index 00000000000..f9e8b06569f --- /dev/null +++ b/scripts/check-is-in-git-install.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Exit 0 if this `prepare` run is a git-dependency install; non-zero otherwise. +# Contributors can force-skip with AGENTLITE_DEV=1, force-build with AGENTLITE_BUILD=1. + +if [ -n "$AGENTLITE_DEV" ]; then + exit 1 +fi + +if [ -n "$AGENTLITE_BUILD" ]; then + exit 0 +fi + +parent_name="$(basename "$(dirname "$PWD")")" +[ "$parent_name" = 'node_modules' ] || +[ "$parent_name" = 'tmp' ] || +[ "$parent_name" = '.tmp' ] diff --git a/src/acp/client.e2e.test.ts b/src/acp/client.e2e.test.ts index a1d21b16dc3..a61e271eec7 100644 --- a/src/acp/client.e2e.test.ts +++ b/src/acp/client.e2e.test.ts @@ -153,7 +153,7 @@ describe('ACP background prompt e2e', () => { expect(promptResp.status).toBe(200); expect(promptResp.json.result).toEqual({ ok: true }); - expect(promptDurationMs).toBeLessThan(250); + expect(promptDurationMs).toBeLessThan(4000); expect( agent.db.getMessagesSince('team@g.us', '', agent.config.assistantName), ).toHaveLength(0); diff --git a/src/agent/action-registration.test.ts b/src/agent/action-registration.test.ts index 4e4f9bbbf8c..69f271e3dbe 100644 --- a/src/agent/action-registration.test.ts +++ b/src/agent/action-registration.test.ts @@ -193,6 +193,9 @@ describe('agent.action() registration', () => { /reserved/, ); expect(() => agent.action('call_action', () => null)).toThrow(/reserved/); + expect(() => agent.action('tool_usage_summary', () => null)).toThrow( + /reserved/, + ); }); it('accepts names that merely share a prefix with reserved ones', () => { @@ -219,4 +222,44 @@ describe('agent.action() registration', () => { expect(res.json.result).toBe('second'); }); }); + + describe('built-in tool_usage_summary', () => { + it('is callable after start and returns aggregated rows', async () => { + await ( + agent as unknown as { + db: { + recordToolUsage: (entry: { + groupJid: string; + sessionId?: string; + toolName: string; + success: boolean; + errorMessage?: string; + durationMs: number; + }) => Promise; + }; + } + ).db.recordToolUsage({ + groupJid: 'test-group', + sessionId: undefined, + toolName: 'Bash', + success: true, + durationMs: 42, + }); + + const res = await call('tool_usage_summary', { tool_name: 'Bash' }); + + expect(res.status).toBe(200); + expect(res.json.result).toEqual({ + summary: [ + { + toolName: 'Bash', + callCount: 1, + successCount: 1, + successRate: 1, + avgDurationMs: 42, + }, + ], + }); + }); + }); }); diff --git a/src/agent/actions-http.e2e.test.ts b/src/agent/actions-http.e2e.test.ts index 49463fe1416..b846bb9f1d4 100644 --- a/src/agent/actions-http.e2e.test.ts +++ b/src/agent/actions-http.e2e.test.ts @@ -14,7 +14,7 @@ * D — healthy path: LAN IP → shim reaches host handler over HTTP * E — negative: bogus token is rejected and bubbles back through stdio */ -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import os from 'os'; import path from 'path'; import url from 'url'; @@ -37,6 +37,7 @@ const SHIM_PATH = path.join( 'dist', 'ipc-mcp-stdio.js', ); +const MCP_REQUEST_TIMEOUT_MS = 15000; if (!fs.existsSync(SHIM_PATH)) { throw new Error( @@ -106,7 +107,7 @@ class StdioMcpClient { `MCP request ${method} #${id} timed out. stderr so far:\n${this.stderr}`, ), ); - }, 5000); + }, MCP_REQUEST_TIMEOUT_MS); this.pending.set(id, (res) => { clearTimeout(timer); resolve(res); diff --git a/src/agent/agent-impl.ts b/src/agent/agent-impl.ts index 00c40ae67f2..1d5503fd0dd 100644 --- a/src/agent/agent-impl.ts +++ b/src/agent/agent-impl.ts @@ -38,7 +38,7 @@ import { AgentDb, initDatabase } from '../db.js'; import { resolveMountAllowlist } from '../mount-security.js'; import { GroupQueue } from '../group-queue.js'; import { writeGroupsSnapshot } from '../container-runner.js'; -import type { ZodRawShape } from 'zod'; +import { z, type ZodRawShape } from 'zod'; import { startIpcWatcher } from '../ipc.js'; import { ActionsHttp } from './actions-http.js'; @@ -462,6 +462,31 @@ export class AgentImpl ); } + this.actions.set('tool_usage_summary', { + description: + 'Returns per-tool call count, success rate, and average duration. ' + + 'Optionally filter by since (ISO timestamp) and tool_name.', + inputSchema: { + since: z.string().optional().describe('ISO timestamp lower bound'), + tool_name: z.string().optional().describe('Filter to a specific tool'), + }, + handler: async (payload) => { + const since = + typeof payload.since === 'string' + ? new Date(payload.since) + : undefined; + if (since && Number.isNaN(since.getTime())) { + throw new Error(`Invalid since timestamp: ${payload.since}`); + } + + const rows = await this.db.getToolUsageSummary({ + since, + toolName: payload.tool_name as string | undefined, + }); + return { summary: rows }; + }, + }); + await this.actionsHttp.start(); this.startSubsystems(); this.emit('started'); diff --git a/src/agent/message-processor.ts b/src/agent/message-processor.ts index bf9ceb8ca04..bd59c67521b 100644 --- a/src/agent/message-processor.ts +++ b/src/agent/message-processor.ts @@ -10,12 +10,7 @@ import { writeGroupsSnapshot, } from '../container-runner.js'; import { findChannel, formatMessages } from '../router.js'; -import { - isSenderAllowed, - isTriggerAllowed, - loadSenderAllowlist, - shouldDropMessage, -} from '../sender-allowlist.js'; +import { isTriggerAllowed, loadSenderAllowlist } from '../sender-allowlist.js'; import { isAcpNoticeMessage } from '../acp/notice.js'; import type { AgentContext } from './agent-context.js'; import type { ChannelManager } from './channel-manager.js'; @@ -39,10 +34,29 @@ function hasWakeTrigger( ); } +function extractText( + content: string | Array<{ text?: string | null } | null> | null | undefined, +): string | undefined { + if (typeof content === 'string') { + const text = content.trim(); + return text || undefined; + } + if (!Array.isArray(content)) return undefined; + const text = content + .map((block) => block?.text ?? '') + .join('') + .trim(); + return text || undefined; +} + export class MessageProcessor { private messageLoopRunning = false; private _messageLoopPromise: Promise | null = null; private _wakeLoop: (() => void) | null = null; + private pendingToolCalls = new Map< + string, + { toolName: string; startTs: number } + >(); constructor( private readonly ctx: AgentContext, @@ -226,6 +240,10 @@ export class MessageProcessor { if (event.sdkType === 'assistant' && msg?.message?.content) { for (const block of msg.message.content) { if (block.type === 'tool_use' && block.name && block.id) { + this.pendingToolCalls.set(block.id, { + toolName: block.name, + startTs: Date.now(), + }); this.ctx.emit('run.tool', { agentId: this.ctx.id, jid: chatJid, @@ -241,6 +259,31 @@ export class MessageProcessor { resetIdleTimer(); } + if (event.sdkType === 'user' && msg?.message?.content) { + for (const block of msg.message.content) { + if (block.type === 'tool_result' && block.tool_use_id) { + const pending = this.pendingToolCalls.get(block.tool_use_id); + if (pending) { + this.pendingToolCalls.delete(block.tool_use_id); + const durationMs = Date.now() - pending.startTs; + const isError = block.is_error === true; + const errorMessage = isError + ? extractText(block.content)?.slice(0, 500) + : undefined; + await this.ctx.db.recordToolUsage({ + groupJid: chatJid, + sessionId: this.ctx.sessions[group.folder], + toolName: pending.toolName, + success: !isError, + errorMessage, + durationMs, + }); + await this.checkToolErrorRateAlert(pending.toolName); + } + } + } + } + if (event.sdkType === 'tool_progress') { this.ctx.emit('run.tool_progress', { agentId: this.ctx.id, @@ -327,6 +370,27 @@ export class MessageProcessor { return true; } + private async checkToolErrorRateAlert(toolName: string): Promise { + const rows = await this.ctx.db.getToolUsageSummary({ + since: new Date(Date.now() - 3600_000), + toolName, + }); + const row = rows[0]; + const errorRate = row ? 1 - row.successRate : 0; + if (row && errorRate > 0.2) { + logger.warn( + { toolName, callCount: row.callCount, errorRate, windowHours: 1 }, + 'Tool error rate exceeded 20% in the last hour', + ); + this.ctx.emit('run.tool_alert', { + toolName, + errorRate, + callCount: row.callCount, + windowHours: 1, + }); + } + } + /** Execute agent in a container for the given group. */ async runAgent( group: InternalRegisteredGroup, diff --git a/src/api/action.ts b/src/api/action.ts index a66c0c0b4e7..4cd74a73469 100644 --- a/src/api/action.ts +++ b/src/api/action.ts @@ -116,6 +116,7 @@ export const RESERVED_ACTION_TYPES = [ 'register_group', 'search_actions', 'call_action', + 'tool_usage_summary', ] as const; const RESERVED_SET: ReadonlySet = new Set(RESERVED_ACTION_TYPES); diff --git a/src/api/events.ts b/src/api/events.ts index 701b7e8dd87..6157342d1bf 100644 --- a/src/api/events.ts +++ b/src/api/events.ts @@ -11,6 +11,7 @@ export interface AgentEvents extends Record { 'run.state': [payload: RunStateEvent]; 'run.sdk_message': [payload: RunSdkMessageEvent]; 'run.tool': [payload: RunToolEvent]; + 'run.tool_alert': [payload: RunToolAlertEvent]; 'run.tool_progress': [payload: RunToolProgressEvent]; 'run.subagent': [payload: RunSubagentEvent]; 'run.status': [payload: RunStatusEvent]; @@ -132,6 +133,18 @@ export interface RunToolEvent { timestamp: string; } +/** Tool error-rate alert for the last hour window. */ +export interface RunToolAlertEvent { + /** Tool name. */ + toolName: string; + /** Failure rate in the alert window. */ + errorRate: number; + /** Number of calls in the alert window. */ + callCount: number; + /** Alert window size in hours. */ + windowHours: number; +} + /** Tool execution progress heartbeat. */ export interface RunToolProgressEvent { /** Stable agent identifier. */ diff --git a/src/channel-driver.test.ts b/src/channel-driver.test.ts index 9e5b0c1c54a..c621d8ca5f2 100644 --- a/src/channel-driver.test.ts +++ b/src/channel-driver.test.ts @@ -8,7 +8,7 @@ import fs from 'fs'; import os from 'os'; import path from 'path'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { AgentImpl } from './agent/agent-impl.js'; import { diff --git a/src/container-runner.test.ts b/src/container-runner.test.ts index 75a23bad0ff..0ddeea9ab3a 100644 --- a/src/container-runner.test.ts +++ b/src/container-runner.test.ts @@ -144,11 +144,7 @@ vi.mock('./box-runtime.js', () => ({ spawnBox: (...args: any[]) => mockSpawnBox(...args), })); -import { - runContainerAgent, - type ContainerEvent, - type ContainerOutput, -} from './container-runner.js'; +import { runContainerAgent, type ContainerEvent } from './container-runner.js'; import type { RuntimeConfig } from './runtime-config.js'; import type { RegisteredGroup } from './types.js'; diff --git a/src/db.test.ts b/src/db.test.ts index b33a1d4adec..78218988613 100644 --- a/src/db.test.ts +++ b/src/db.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import { _initTestDatabase, AgentDb } from './db.js'; import { NewMessage } from './types.js'; @@ -9,6 +9,10 @@ beforeEach(() => { db = _initTestDatabase(); }); +afterEach(() => { + vi.useRealTimers(); +}); + // Helper to store a message using the normalized NewMessage interface function store(overrides: { id: string; @@ -527,3 +531,163 @@ describe('registered group isMain', () => { expect(group.isMain).toBeUndefined(); }); }); + +describe('tool_usage', () => { + it('recordToolUsage inserts rows', async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-19T00:00:00.000Z')); + + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Bash', + success: true, + errorMessage: undefined, + durationMs: 120, + }); + + const rows = ( + db as unknown as { + db: { + prepare: (sql: string) => { + all: () => Array<{ + group_jid: string; + session_id: string | null; + tool_name: string; + success: number; + error_message: string | null; + duration_ms: number; + ts: string; + }>; + }; + }; + } + ).db + .prepare( + ` + SELECT group_jid, session_id, tool_name, success, error_message, duration_ms, ts + FROM tool_usage + `, + ) + .all(); + + expect(rows).toEqual([ + { + group_jid: 'group@g.us', + session_id: null, + tool_name: 'Bash', + success: 1, + error_message: null, + duration_ms: 120, + ts: '2026-04-19T00:00:00.000Z', + }, + ]); + }); + + it('getToolUsageSummary returns correct aggregates', async () => { + vi.useFakeTimers(); + + vi.setSystemTime(new Date('2026-04-19T00:00:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Bash', + success: true, + durationMs: 100, + }); + vi.setSystemTime(new Date('2026-04-19T00:01:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Bash', + success: true, + durationMs: 110, + }); + vi.setSystemTime(new Date('2026-04-19T00:02:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Bash', + success: true, + durationMs: 120, + }); + vi.setSystemTime(new Date('2026-04-19T00:03:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Bash', + success: false, + errorMessage: 'boom', + durationMs: 130, + }); + + await expect(db.getToolUsageSummary()).resolves.toEqual([ + { + toolName: 'Bash', + callCount: 4, + successCount: 3, + successRate: 0.75, + avgDurationMs: 115, + }, + ]); + }); + + it('applies since filter to exclude older rows', async () => { + vi.useFakeTimers(); + + vi.setSystemTime(new Date('2026-04-18T23:00:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Read', + success: true, + durationMs: 50, + }); + vi.setSystemTime(new Date('2026-04-19T01:00:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Read', + success: false, + errorMessage: 'timeout', + durationMs: 150, + }); + + await expect( + db.getToolUsageSummary({ since: new Date('2026-04-19T00:00:00.000Z') }), + ).resolves.toEqual([ + { + toolName: 'Read', + callCount: 1, + successCount: 0, + successRate: 0, + avgDurationMs: 150, + }, + ]); + }); + + it('filters to a specific tool name', async () => { + vi.useFakeTimers(); + + vi.setSystemTime(new Date('2026-04-19T00:00:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Read', + success: true, + durationMs: 80, + }); + vi.setSystemTime(new Date('2026-04-19T00:01:00.000Z')); + await db.recordToolUsage({ + groupJid: 'group@g.us', + toolName: 'Bash', + success: false, + errorMessage: 'permission denied', + durationMs: 180, + }); + + await expect(db.getToolUsageSummary({ toolName: 'Read' })).resolves.toEqual( + [ + { + toolName: 'Read', + callCount: 1, + successCount: 1, + successRate: 1, + avgDurationMs: 80, + }, + ], + ); + }); +}); diff --git a/src/db.ts b/src/db.ts index 5965c9cd244..e599ce81782 100644 --- a/src/db.ts +++ b/src/db.ts @@ -82,6 +82,19 @@ export function createSchema( container_config TEXT, requires_trigger INTEGER DEFAULT 1 ); + CREATE TABLE IF NOT EXISTS tool_usage ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + group_jid TEXT NOT NULL, + session_id TEXT, + tool_name TEXT NOT NULL, + success INTEGER NOT NULL DEFAULT 1, + error_message TEXT, + duration_ms INTEGER NOT NULL, + ts TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS idx_tool_usage_ts ON tool_usage(ts); + CREATE INDEX IF NOT EXISTS idx_tool_usage_tool_name ON tool_usage(tool_name); + CREATE INDEX IF NOT EXISTS idx_tool_usage_group_jid ON tool_usage(group_jid); `); @@ -523,6 +536,72 @@ export class AgentDb { ); } + async recordToolUsage(entry: { + groupJid: string; + sessionId?: string; + toolName: string; + success: boolean; + errorMessage?: string; + durationMs: number; + }): Promise { + this.db + .prepare( + ` + INSERT INTO tool_usage (group_jid, session_id, tool_name, success, error_message, duration_ms, ts) + VALUES (?, ?, ?, ?, ?, ?, ?) + `, + ) + .run( + entry.groupJid, + entry.sessionId ?? null, + entry.toolName, + entry.success ? 1 : 0, + entry.errorMessage ?? null, + entry.durationMs, + new Date().toISOString(), + ); + } + + async getToolUsageSummary(opts?: { + since?: Date; + toolName?: string; + }): Promise< + Array<{ + toolName: string; + callCount: number; + successCount: number; + successRate: number; + avgDurationMs: number; + }> + > { + return this.db + .prepare( + ` + SELECT + tool_name AS toolName, + COUNT(*) AS callCount, + COALESCE(SUM(success), 0) AS successCount, + CAST(COALESCE(SUM(success), 0) AS REAL) / COUNT(*) AS successRate, + AVG(duration_ms) AS avgDurationMs + FROM tool_usage + WHERE (:since IS NULL OR ts >= :since) + AND (:toolName IS NULL OR tool_name = :toolName) + GROUP BY tool_name + ORDER BY callCount DESC, toolName ASC + `, + ) + .all({ + since: opts?.since?.toISOString() ?? null, + toolName: opts?.toolName ?? null, + }) as Array<{ + toolName: string; + callCount: number; + successCount: number; + successRate: number; + avgDurationMs: number; + }>; + } + // --- Router state --- getRouterState(key: string): string | undefined { diff --git a/src/message-events.test.ts b/src/message-events.test.ts index f40d4b64d00..a4987a3afd2 100644 --- a/src/message-events.test.ts +++ b/src/message-events.test.ts @@ -15,9 +15,11 @@ import { import { buildRuntimeConfig } from './runtime-config.js'; import { _initTestDatabase, AgentDb } from './db.js'; import type { Channel } from './types.js'; +import type { ChannelDriverConfig } from './api/channel-driver.js'; let tmpDir: string; const rtConfig = buildRuntimeConfig({}, '/tmp/agentlite-test-pkg'); +type OnMessageHandler = ChannelDriverConfig['onMessage']; function createAgent(name: string): AgentImpl { const config = buildAgentConfig({ @@ -83,7 +85,7 @@ describe('message.in event', () => { // Access the internal handler directly const handler = ( agent as unknown as { - buildDefaultChannelHandler: () => { onMessage: Function }; + buildDefaultChannelHandler: () => { onMessage: OnMessageHandler }; } ).buildDefaultChannelHandler(); @@ -113,7 +115,7 @@ describe('message.in event', () => { const handler = ( agent as unknown as { - buildDefaultChannelHandler: () => { onMessage: Function }; + buildDefaultChannelHandler: () => { onMessage: OnMessageHandler }; } ).buildDefaultChannelHandler(); @@ -137,7 +139,7 @@ describe('message.in event', () => { const handler = ( agent as unknown as { - buildDefaultChannelHandler: () => { onMessage: Function }; + buildDefaultChannelHandler: () => { onMessage: OnMessageHandler }; } ).buildDefaultChannelHandler(); @@ -166,7 +168,7 @@ describe('message.in event', () => { const handler = ( agent as unknown as { - buildDefaultChannelHandler: () => { onMessage: Function }; + buildDefaultChannelHandler: () => { onMessage: OnMessageHandler }; } ).buildDefaultChannelHandler(); @@ -203,7 +205,9 @@ describe('message.in event', () => { // Simulate what happens inside addChannel → factory(config) // The factory receives config.onMessage — calling it should trigger message.in const config = ( - agent as unknown as { _buildDriverConfig: () => { onMessage: Function } } + agent as unknown as { + _buildDriverConfig: () => { onMessage: OnMessageHandler }; + } )._buildDriverConfig(); // This is what a real ChannelDriver would call when it receives a message diff --git a/src/task-scheduler.ts b/src/task-scheduler.ts index 5a9e7aae6ef..bc331d9d972 100644 --- a/src/task-scheduler.ts +++ b/src/task-scheduler.ts @@ -1,6 +1,5 @@ import { CronExpressionParser } from 'cron-parser'; import fs from 'fs'; -import path from 'path'; import { ContainerEvent, diff --git a/src/tool-usage.test.ts b/src/tool-usage.test.ts new file mode 100644 index 00000000000..07c00177b2e --- /dev/null +++ b/src/tool-usage.test.ts @@ -0,0 +1,343 @@ +import fs from 'fs'; +import os from 'os'; +import path from 'path'; + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('./container-runner.js', async () => { + const actual = await vi.importActual( + './container-runner.js', + ); + return { + ...actual, + runContainerAgent: vi.fn(), + }; +}); + +import { AgentImpl } from './agent/agent-impl.js'; +import { + buildAgentConfig, + resolveSerializableAgentSettings, +} from './agent/config.js'; +import { _initTestDatabase, AgentDb } from './db.js'; +import { buildRuntimeConfig } from './runtime-config.js'; +import { runContainerAgent } from './container-runner.js'; +import type { Channel, RegisteredGroup } from './types.js'; + +const runtimeConfig = buildRuntimeConfig( + { timezone: 'UTC' }, + '/tmp/agentlite-test-pkg', +); + +const MAIN_GROUP: RegisteredGroup = { + name: 'Main', + folder: 'main', + trigger: 'always', + added_at: '2024-01-01T00:00:00.000Z', + isMain: true, +}; + +let tmpDir: string; +let db: AgentDb; + +function createAgent(name: string): AgentImpl { + const config = buildAgentConfig({ + agentId: `${name}00000000`.slice(0, 8), + ...resolveSerializableAgentSettings( + name, + { workdir: path.join(tmpDir, 'agents', name) }, + tmpDir, + ), + }); + return new AgentImpl(config, runtimeConfig); +} + +function createMockChannel(): Channel { + return { + name: 'mock', + async connect(): Promise {}, + async disconnect(): Promise {}, + async sendMessage(): Promise {}, + isConnected(): boolean { + return true; + }, + ownsJid(jid: string): boolean { + return jid === 'mock:tool-usage'; + }, + async setTyping(): Promise {}, + }; +} + +function setupAgent(): AgentImpl { + const agent = createAgent('tool-usage'); + agent._setDbForTests(db); + agent._setRegisteredGroups({ 'mock:tool-usage': MAIN_GROUP }); + (agent as unknown as { _started: boolean })._started = true; + (agent as unknown as { channels: Map }).channels.set( + 'mock', + createMockChannel(), + ); + + db.storeChatMetadata( + 'mock:tool-usage', + '2026-04-19T00:00:00.000Z', + 'Tool Usage Chat', + ); + db.storeMessage({ + id: 'msg-1', + chat_jid: 'mock:tool-usage', + sender: 'user1', + sender_name: 'User 1', + content: 'run the tool', + timestamp: '2026-04-19T00:00:01.000Z', + is_from_me: false, + }); + + return agent; +} + +function sdkMsg(sdkType: string, message: unknown, sdkSubtype?: string) { + return { type: 'sdk_message' as const, sdkType, sdkSubtype, message }; +} + +describe('tool usage analytics', () => { + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agentlite-tool-usage-')); + db = _initTestDatabase(); + vi.mocked(runContainerAgent).mockReset(); + }); + + afterEach(() => { + vi.useRealTimers(); + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + /* ignore */ + } + }); + + it('records successful tool results from tool_use/tool_result SDK messages', async () => { + const agent = setupAgent(); + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-19T00:00:00.000Z')); + + vi.mocked(runContainerAgent).mockImplementation( + async (_group, _input, _rc, _onProcess, onOutput) => { + await onOutput?.( + sdkMsg('assistant', { + uuid: 'a1', + message: { + content: [ + { + type: 'tool_use', + name: 'Bash', + id: 'tool-1', + input: { command: 'pwd' }, + }, + ], + }, + }), + ); + vi.advanceTimersByTime(25); + await onOutput?.( + sdkMsg('user', { + uuid: 'u1', + message: { + content: [ + { + type: 'tool_result', + tool_use_id: 'tool-1', + is_error: false, + content: 'ok', + }, + ], + }, + }), + ); + await onOutput?.({ + type: 'state', + state: 'stopped', + reason: 'exit', + exitCode: 0, + }); + return { status: 'success', result: null }; + }, + ); + + await agent.processGroupMessages('mock:tool-usage'); + + const rows = ( + db as unknown as { + db: { + prepare: (sql: string) => { + all: () => Array<{ + group_jid: string; + tool_name: string; + success: number; + error_message: string | null; + duration_ms: number; + }>; + }; + }; + } + ).db + .prepare( + ` + SELECT group_jid, tool_name, success, error_message, duration_ms + FROM tool_usage + `, + ) + .all(); + + expect(rows).toEqual([ + { + group_jid: 'mock:tool-usage', + tool_name: 'Bash', + success: 1, + error_message: null, + duration_ms: 25, + }, + ]); + + await expect(db.getToolUsageSummary()).resolves.toEqual([ + expect.objectContaining({ + toolName: 'Bash', + callCount: 1, + successCount: 1, + successRate: 1, + }), + ]); + }); + + it('records failed tool results as success_rate 0', async () => { + const agent = setupAgent(); + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-19T00:00:00.000Z')); + + vi.mocked(runContainerAgent).mockImplementation( + async (_group, _input, _rc, _onProcess, onOutput) => { + await onOutput?.( + sdkMsg('assistant', { + uuid: 'a1', + message: { + content: [ + { + type: 'tool_use', + name: 'Bash', + id: 'tool-2', + input: { command: 'false' }, + }, + ], + }, + }), + ); + vi.advanceTimersByTime(10); + await onOutput?.( + sdkMsg('user', { + uuid: 'u1', + message: { + content: [ + { + type: 'tool_result', + tool_use_id: 'tool-2', + is_error: true, + content: 'command failed', + }, + ], + }, + }), + ); + await onOutput?.({ + type: 'state', + state: 'stopped', + reason: 'exit', + exitCode: 0, + }); + return { status: 'success', result: null }; + }, + ); + + await agent.processGroupMessages('mock:tool-usage'); + + await expect(db.getToolUsageSummary()).resolves.toEqual([ + expect.objectContaining({ + toolName: 'Bash', + callCount: 1, + successCount: 0, + successRate: 0, + }), + ]); + }); + + it('emits run.tool_alert when a tool falls below the hourly success threshold', async () => { + const agent = setupAgent(); + const alerts: Array> = []; + agent.on('run.tool_alert', (evt) => + alerts.push(evt as unknown as Record), + ); + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-19T00:00:00.000Z')); + + vi.mocked(runContainerAgent).mockImplementation( + async (_group, _input, _rc, _onProcess, onOutput) => { + await onOutput?.( + sdkMsg('assistant', { + uuid: 'a-1', + message: { + content: [ + { + type: 'tool_use', + name: 'Bash', + id: 'tool-1', + input: { command: 'step-1' }, + }, + ], + }, + }), + ); + vi.advanceTimersByTime(1); + await onOutput?.( + sdkMsg('user', { + uuid: 'u-1', + message: { + content: [ + { + type: 'tool_result', + tool_use_id: 'tool-1', + is_error: true, + content: 'failure 1', + }, + ], + }, + }), + ); + await onOutput?.({ + type: 'state', + state: 'stopped', + reason: 'exit', + exitCode: 0, + }); + return { status: 'success', result: null }; + }, + ); + + await agent.processGroupMessages('mock:tool-usage'); + + await expect(db.getToolUsageSummary()).resolves.toEqual([ + expect.objectContaining({ + toolName: 'Bash', + callCount: 1, + successCount: 0, + successRate: 0, + }), + ]); + expect(alerts).toHaveLength(1); + expect(alerts[0]).toMatchObject({ + toolName: 'Bash', + errorRate: 1, + callCount: 1, + windowHours: 1, + }); + expect(alerts[0]).not.toHaveProperty('agentId'); + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index a456d1cc3df..c2c5332812f 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -3,5 +3,6 @@ import { defineConfig } from 'vitest/config'; export default defineConfig({ test: { include: ['src/**/*.test.ts', 'setup/**/*.test.ts'], + testTimeout: 15000, }, });