diff --git a/.changeset/list-syntax-toolcontext.md b/.changeset/list-syntax-toolcontext.md new file mode 100644 index 000000000..0a4e0bfa0 --- /dev/null +++ b/.changeset/list-syntax-toolcontext.md @@ -0,0 +1,5 @@ +--- +"@livekit/agents": minor +--- + +**BREAKING**: `Agent({ tools })` and `agent.updateTools()` now accept a flat list `(FunctionTool | ProviderDefinedTool)[]` instead of a `Record` map, and `llm.tool({ ... })` requires a `name` field. `ToolContext` is now a Python-parity class with `functionTools` / `providerTools` / `toolsets` accessors, plus `flatten()`, `hasTool(name)`, `getFunctionTool(name)`, `updateTools()`, `copy()`, and `equals()`. To match the Python reference, registering two **different** function-tool instances under the same `name` now throws `duplicate function name: ` instead of silently overriding the earlier entry; passing the **same instance** twice is a no-op. `agent.toolCtx` returns a defensive copy so callers can no longer mutate the agent's internal state. `LLM.chat({ toolCtx })` accepts either a `ToolContext` instance or a raw `(FunctionTool | ProviderDefinedTool)[]` array (`ToolCtxInput`) and normalizes it internally, so callers don't have to construct a `ToolContext` themselves. Stateful `Toolset` containers are not part of this release — the `toolsets` accessor currently returns an empty list and `TODO`s in `tool_context.ts` mark every site where Python's Toolset support will plug in later. diff --git a/agents/src/beta/workflows/task_group.ts b/agents/src/beta/workflows/task_group.ts index 8c96790dd..add6655fb 100644 --- a/agents/src/beta/workflows/task_group.ts +++ b/agents/src/beta/workflows/task_group.ts @@ -84,10 +84,7 @@ export class TaskGroup extends AgentTask { const outOfScopeTool = this.buildOutOfScopeTool(taskId); if (outOfScopeTool) { - await this._currentTask.updateTools({ - ...this._currentTask.toolCtx, - out_of_scope: outOfScopeTool, - }); + await this._currentTask.updateTools([...this._currentTask.toolCtx.tools, outOfScopeTool]); } try { @@ -190,6 +187,7 @@ export class TaskGroup extends AgentTask { const visitedTasks = this._visitedTasks; return tool({ + name: 'out_of_scope', description, flags: ToolFlag.IGNORE_ON_ENTER, parameters: z.object({ diff --git a/agents/src/inference/llm.ts b/agents/src/inference/llm.ts index 3434e496c..87679389f 100644 --- a/agents/src/inference/llm.ts +++ b/agents/src/inference/llm.ts @@ -249,7 +249,7 @@ export class LLM extends llm.LLM { chat({ chatCtx, - toolCtx, + toolCtx: toolCtxInput, connOptions = DEFAULT_API_CONNECT_OPTIONS, parallelToolCalls, toolChoice, @@ -258,7 +258,7 @@ export class LLM extends llm.LLM { extraKwargs, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; @@ -266,6 +266,7 @@ export class LLM extends llm.LLM { // TODO(AJS-270): Add responseFormat parameter extraKwargs?: Record; }): LLMStream { + const toolCtx = llm.toToolContext(toolCtxInput); let modelOptions: Record = { ...(extraKwargs || {}) }; parallelToolCalls = @@ -273,7 +274,11 @@ export class LLM extends llm.LLM { ? parallelToolCalls : this.opts.modelOptions.parallel_tool_calls; - if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) { + if ( + toolCtx && + Object.keys(toolCtx.functionTools).length > 0 && + parallelToolCalls !== undefined + ) { modelOptions.parallel_tool_calls = parallelToolCalls; } @@ -379,26 +384,32 @@ export class LLMStream extends llm.LLMStream { )) as OpenAI.ChatCompletionMessageParam[]; const tools = this.toolCtx - ? Object.entries(this.toolCtx).map(([name, func]) => { - const oaiParams = { - type: 'function' as const, - function: { - name, - description: func.description, - parameters: llm.toJsonSchema( - func.parameters, - true, - this.strictToolSchema, - ) as unknown as OpenAI.Chat.Completions.ChatCompletionFunctionTool['function']['parameters'], - } as OpenAI.Chat.Completions.ChatCompletionFunctionTool['function'], - }; - - if (this.strictToolSchema) { - oaiParams.function.strict = true; - } - - return oaiParams; - }) + ? this.toolCtx + .flatten() + .map((t) => { + if (llm.isFunctionTool(t)) { + const oaiParams = { + type: 'function' as const, + function: { + name: t.name, + description: t.description, + parameters: llm.toJsonSchema( + t.parameters, + true, + this.strictToolSchema, + ) as unknown as OpenAI.Chat.Completions.ChatCompletionFunctionTool['function']['parameters'], + } as OpenAI.Chat.Completions.ChatCompletionFunctionTool['function'], + }; + if (this.strictToolSchema) { + oaiParams.function.strict = true; + } + return oaiParams; + } + // Provider-defined tools are not yet supported by the inference adapter; skip them + // here rather than emitting a malformed tool definition. See AJS-112. + return undefined; + }) + .filter((t): t is NonNullable => t !== undefined) : undefined; const requestOptions: Record = dropUnsupportedParams( diff --git a/agents/src/llm/chat_context.test.ts b/agents/src/llm/chat_context.test.ts index 849469e55..350dcd1cd 100644 --- a/agents/src/llm/chat_context.test.ts +++ b/agents/src/llm/chat_context.test.ts @@ -19,6 +19,7 @@ import { isInstructions, renderInstructions, } from './chat_context.js'; +import { ToolContext, tool } from './tool_context.js'; initializeLogger({ pretty: false, level: 'error' }); @@ -1479,3 +1480,31 @@ extra`; expect((baseCtx.items[0]! as ChatMessage).content[0]).toBe(instr); }); }); + +describe('ChatContext.copy with toolCtx filter', () => { + it('drops function calls / outputs whose tool is not in the supplied ToolContext', () => { + const known = tool({ name: 'known', description: 'k', execute: async () => 'ok' }); + const ctx = new ChatContext([ + ChatMessage.create({ role: 'user', content: ['hello'] }), + FunctionCall.create({ callId: 'c1', name: 'known', args: '{}' }), + FunctionCallOutput.create({ callId: 'c1', name: 'known', output: 'done', isError: false }), + FunctionCall.create({ callId: 'c2', name: 'removed', args: '{}' }), + FunctionCallOutput.create({ callId: 'c2', name: 'removed', output: 'x', isError: false }), + ]); + + const filtered = ctx.copy({ toolCtx: new ToolContext([known]) }); + const types = filtered.items.map((i) => `${i.type}:${'name' in i ? i.name : ''}`); + expect(types).toEqual(['message:', 'function_call:known', 'function_call_output:known']); + }); + + it('keeps provider-tool calls when the ToolContext holds a matching provider tool id', () => { + const provider = tool({ id: 'code_runner', config: {} }); + const ctx = new ChatContext([ + FunctionCall.create({ callId: 'p1', name: 'code_runner', args: '{}' }), + FunctionCall.create({ callId: 'p2', name: 'other', args: '{}' }), + ]); + + const filtered = ctx.copy({ toolCtx: new ToolContext([provider]) }); + expect(filtered.items.map((i) => ('name' in i ? i.name : ''))).toEqual(['code_runner']); + }); +}); diff --git a/agents/src/llm/chat_context.ts b/agents/src/llm/chat_context.ts index 743e7efb8..34cf39200 100644 --- a/agents/src/llm/chat_context.ts +++ b/agents/src/llm/chat_context.ts @@ -835,7 +835,7 @@ export class ChatContext { continue; } - if (toolCtx !== undefined && isToolCallOrOutput(item) && toolCtx[item.name] === undefined) { + if (toolCtx !== undefined && isToolCallOrOutput(item) && !toolCtx.hasTool(item.name)) { continue; } diff --git a/agents/src/llm/fallback_adapter.test.ts b/agents/src/llm/fallback_adapter.test.ts index a9747c885..d466c9306 100644 --- a/agents/src/llm/fallback_adapter.test.ts +++ b/agents/src/llm/fallback_adapter.test.ts @@ -9,7 +9,7 @@ import { delay } from '../utils.js'; import type { ChatContext } from './chat_context.js'; import { FallbackAdapter } from './fallback_adapter.js'; import { type ChatChunk, LLM, LLMStream } from './llm.js'; -import type { ToolChoice, ToolContext } from './tool_context.js'; +import type { ToolChoice, ToolCtxInput } from './tool_context.js'; class MockLLMStream extends LLMStream { public myLLM: LLM; @@ -18,7 +18,7 @@ class MockLLMStream extends LLMStream { llm: LLM, opts: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions: APIConnectOptions; }, private shouldFail: boolean = false, @@ -64,7 +64,7 @@ class MockLLM extends LLM { chat(opts: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: ToolChoice; diff --git a/agents/src/llm/fallback_adapter.ts b/agents/src/llm/fallback_adapter.ts index 128c2392c..27d87d2a0 100644 --- a/agents/src/llm/fallback_adapter.ts +++ b/agents/src/llm/fallback_adapter.ts @@ -8,7 +8,7 @@ import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js import type { ChatContext } from './chat_context.js'; import type { ChatChunk } from './llm.js'; import { LLM, LLMStream } from './llm.js'; -import type { ToolChoice, ToolContext } from './tool_context.js'; +import type { ToolChoice, ToolCtxInput } from './tool_context.js'; /** * Default connection options for FallbackAdapter. @@ -113,7 +113,7 @@ export class FallbackAdapter extends LLM { chat(opts: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: ToolChoice; @@ -159,7 +159,7 @@ class FallbackLLMStream extends LLMStream { adapter: FallbackAdapter, opts: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: ToolChoice; diff --git a/agents/src/llm/index.ts b/agents/src/llm/index.ts index 950296c9c..4837f2cb9 100644 --- a/agents/src/llm/index.ts +++ b/agents/src/llm/index.ts @@ -4,15 +4,20 @@ export { handoff, isFunctionTool, + isProviderDefinedTool, + isTool, tool, + ToolContext, ToolError, ToolFlag, + toToolContext, type AgentHandoff, type FunctionTool, type ProviderDefinedTool, type Tool, type ToolChoice, - type ToolContext, + type ToolContextEntry, + type ToolCtxInput, type ToolOptions, type ToolType, } from './tool_context.js'; diff --git a/agents/src/llm/llm.ts b/agents/src/llm/llm.ts index 553541939..0c05bbb2d 100644 --- a/agents/src/llm/llm.ts +++ b/agents/src/llm/llm.ts @@ -11,7 +11,12 @@ import { recordException, traceTypes, tracer } from '../telemetry/index.js'; import { type APIConnectOptions, intervalForRetry } from '../types.js'; import { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js'; import { type ChatContext, type ChatRole, type FunctionCall } from './chat_context.js'; -import type { ToolChoice, ToolContext } from './tool_context.js'; +import { + type ToolChoice, + type ToolContext, + type ToolCtxInput, + toToolContext, +} from './tool_context.js'; export interface ChoiceDelta { role: ChatRole; @@ -91,7 +96,12 @@ export abstract class LLM extends (EventEmitter as new () => TypedEmitter { connOptions, }: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions: APIConnectOptions; }, ) { this.#llm = llm; this.#chatCtx = chatCtx; - this.#toolCtx = toolCtx; + this.#toolCtx = toToolContext(toolCtx); this._connOptions = connOptions; this.monitorMetrics(); this.abortController.signal.addEventListener('abort', () => { diff --git a/agents/src/llm/tool_context.test.ts b/agents/src/llm/tool_context.test.ts index 183c7ff2a..4d9cdb83d 100644 --- a/agents/src/llm/tool_context.test.ts +++ b/agents/src/llm/tool_context.test.ts @@ -5,7 +5,7 @@ import { describe, expect, it } from 'vitest'; import { z } from 'zod'; import * as z3 from 'zod/v3'; import * as z4 from 'zod/v4'; -import { type ToolOptions, tool } from './tool_context.js'; +import { ToolContext, type ToolOptions, tool } from './tool_context.js'; import { createToolOptions, oaiParams } from './utils.js'; describe('Tool Context', () => { @@ -77,6 +77,7 @@ describe('Tool Context', () => { describe('tool', () => { it('should create and execute a basic core tool', async () => { const getWeather = tool({ + name: 'getWeather', description: 'Get the weather for a given location', parameters: z.object({ location: z.string(), @@ -95,6 +96,7 @@ describe('Tool Context', () => { it('should properly type a callable function', async () => { const testFunction = tool({ + name: 'testFunction', description: 'Test function', parameters: z.object({ name: z.string().describe('The user name'), @@ -114,6 +116,7 @@ describe('Tool Context', () => { it('should handle async execution', async () => { const testFunction = tool({ + name: 'asyncTestFunction', description: 'Async test function', parameters: z.object({ delay: z.number().describe('Delay in milliseconds'), @@ -157,6 +160,7 @@ describe('Tool Context', () => { describe('optional parameters', () => { it('should create a tool without parameters', async () => { const simpleAction = tool({ + name: 'simpleAction', description: 'Perform a simple action', execute: async () => { return 'Action performed'; @@ -175,6 +179,7 @@ describe('Tool Context', () => { it('should support .optional() fields in tool parameters', async () => { const weatherTool = tool({ + name: 'weatherTool', description: 'Get weather information', parameters: z.object({ location: z.string().describe('The city or location').optional(), @@ -205,6 +210,7 @@ describe('Tool Context', () => { it('should handle tools with context but no parameters', async () => { const greetUser = tool({ + name: 'greetUser', description: 'Greet the current user', execute: async (_, { ctx }: ToolOptions<{ username: string }>) => { return `Hello, ${ctx.userData.username}!`; @@ -217,6 +223,7 @@ describe('Tool Context', () => { it('should create a tool that accesses tool call id without parameters', async () => { const getCallId = tool({ + name: 'getCallId', description: 'Get the current tool call ID', execute: async (_, { toolCallId }) => { return `Tool call ID: ${toolCallId}`; @@ -231,6 +238,7 @@ describe('Tool Context', () => { describe('Zod v3 and v4 compatibility', () => { it('should work with Zod v3 schemas', async () => { const v3Tool = tool({ + name: 'v3Tool', description: 'A tool using Zod v3 schema', parameters: z3.object({ name: z3.string(), @@ -250,6 +258,7 @@ describe('Tool Context', () => { it('should work with Zod v4 schemas', async () => { const v4Tool = tool({ + name: 'v4Tool', description: 'A tool using Zod v4 schema', parameters: z4.object({ name: z4.string(), @@ -269,6 +278,7 @@ describe('Tool Context', () => { it('should handle v4 schemas with optional fields', async () => { const v4Tool = tool({ + name: 'v4OptionalTool', description: 'Tool with optional field using v4', parameters: z4.object({ required: z4.string(), @@ -291,6 +301,7 @@ describe('Tool Context', () => { it('should handle v4 enum schemas', async () => { const v4Tool = tool({ + name: 'v4EnumTool', description: 'Tool with enum using v4', parameters: z4.object({ color: z4.enum(['red', 'blue', 'green']), @@ -306,6 +317,7 @@ describe('Tool Context', () => { it('should handle v4 array schemas', async () => { const v4Tool = tool({ + name: 'v4ArrayTool', description: 'Tool with array using v4', parameters: z4.object({ tags: z4.array(z4.string()), @@ -324,6 +336,7 @@ describe('Tool Context', () => { it('should handle v4 nested object schemas', async () => { const v4Tool = tool({ + name: 'v4NestedTool', description: 'Tool with nested object using v4', parameters: z4.object({ user: z4.object({ @@ -405,3 +418,165 @@ describe('Tool Context', () => { }); }); }); + +describe('tool() name requirement', () => { + it('throws when name is missing', () => { + expect(() => + // @ts-expect-error - name is required + tool({ + description: 'no name', + execute: async () => 'x', + }), + ).toThrow('requires a non-empty name'); + }); + + it('throws when name is empty', () => { + expect(() => + tool({ + name: '', + description: 'empty name', + execute: async () => 'x', + }), + ).toThrow('requires a non-empty name'); + }); + + it('stores the name on the returned function tool', () => { + const t = tool({ + name: 'doStuff', + description: 'd', + execute: async () => 'x', + }); + expect(t.name).toBe('doStuff'); + }); +}); + +describe('ToolContext', () => { + const makeFn = (name: string) => + tool({ + name, + description: `${name} tool`, + execute: async () => name, + }); + + it('empty() returns an empty context', () => { + const ctx = ToolContext.empty(); + expect(ctx.functionTools).toEqual({}); + expect(ctx.providerTools).toEqual([]); + expect(ctx.toolsets).toEqual([]); + expect(ctx.flatten()).toEqual([]); + }); + + it('indexes function tools by name and supports lookup', () => { + const a = makeFn('a'); + const b = makeFn('b'); + const ctx = new ToolContext([a, b]); + + expect(ctx.functionTools).toEqual({ a, b }); + expect(ctx.getFunctionTool('a')).toBe(a); + expect(ctx.getFunctionTool('b')).toBe(b); + expect(ctx.getFunctionTool('missing')).toBeUndefined(); + }); + + it('throws on duplicate function names with different instances', () => { + // Matches Python's `if existing is not tool: raise ValueError(...)` — silently overriding + // a registered tool would mask a real bug at the caller (two distinct functions colliding + // on a single advertised name). + const a1 = makeFn('a'); + const a2 = makeFn('a'); + expect(() => new ToolContext([a1, a2])).toThrow('duplicate function name: a'); + }); + + it('silently skips the same function tool instance listed multiple times', () => { + // Matches Python's `return # same instance, skip` branch. Useful when a tool gets + // included both directly and via a future Toolset that re-exports it. + const a = makeFn('a'); + const ctx = new ToolContext([a, a]); + expect(ctx.getFunctionTool('a')).toBe(a); + expect(Object.keys(ctx.functionTools)).toEqual(['a']); + }); + + it('separates provider tools from function tools', () => { + const fnA = makeFn('a'); + const provider = tool({ id: 'code', config: { language: 'python' } }); + const ctx = new ToolContext([fnA, provider]); + + expect(ctx.functionTools).toEqual({ a: fnA }); + expect(ctx.providerTools).toEqual([provider]); + expect(ctx.flatten()).toEqual([fnA, provider]); + }); + + it('updateTools replaces the entire context', () => { + const a = makeFn('a'); + const b = makeFn('b'); + const ctx = new ToolContext([a]); + ctx.updateTools([b]); + expect(ctx.getFunctionTool('a')).toBeUndefined(); + expect(ctx.getFunctionTool('b')).toBe(b); + }); + + it('copy() yields an independent context with the same tools', () => { + const a = makeFn('a'); + const ctx = new ToolContext([a]); + const dup = ctx.copy(); + + expect(dup.getFunctionTool('a')).toBe(a); + dup.updateTools([]); + expect(ctx.getFunctionTool('a')).toBe(a); + expect(dup.getFunctionTool('a')).toBeUndefined(); + }); + + it('equals() compares function tool maps and provider lists by identity', () => { + const a = makeFn('a'); + const b = makeFn('b'); + const c = makeFn('c'); + + expect(new ToolContext([a, b]).equals(new ToolContext([a, b]))).toBe(true); + expect(new ToolContext([a, b]).equals(new ToolContext([a]))).toBe(false); + expect(new ToolContext([a, b]).equals(new ToolContext([a, c]))).toBe(false); + }); + + it('equals() is reflexive', () => { + const a = makeFn('a'); + const provider = tool({ id: 'code', config: { language: 'python' } }); + const ctx = new ToolContext([a, provider]); + expect(ctx.equals(ctx)).toBe(true); + }); + + it('equals() treats provider tool order as insignificant', () => { + // Matches Python's `set(id(t) for t in self._provider_tools)` comparison: two contexts + // that hold the same provider-tool identities in different order are still equal so + // realtime-session / preemptive-generation reuse fast paths are not invalidated. + const a = makeFn('a'); + const p1 = tool({ id: 'code', config: { language: 'python' } }); + const p2 = tool({ id: 'browser', config: {} }); + expect(new ToolContext([a, p1, p2]).equals(new ToolContext([a, p2, p1]))).toBe(true); + }); + + it('equals() supports contexts with only provider tools', () => { + const p1 = tool({ id: 'code', config: {} }); + const p2 = tool({ id: 'browser', config: {} }); + expect(new ToolContext([p1, p2]).equals(new ToolContext([p1, p2]))).toBe(true); + const p3 = tool({ id: 'code', config: {} }); // distinct identity, same id + expect(new ToolContext([p1]).equals(new ToolContext([p3]))).toBe(false); + }); + + it('hasTool() matches function tools by name and provider tools by id', () => { + const a = makeFn('a'); + const provider = tool({ id: 'code_runner', config: {} }); + const ctx = new ToolContext([a, provider]); + + expect(ctx.hasTool('a')).toBe(true); + expect(ctx.hasTool('code_runner')).toBe(true); + expect(ctx.hasTool('missing')).toBe(false); + }); + + it('flatten() returns function tools in insertion order followed by provider tools', () => { + // Matches Python's `flatten()`: list(self._fnc_tools_map.values()) + self._provider_tools. + const a = makeFn('a'); + const b = makeFn('b'); + const provider = tool({ id: 'code', config: {} }); + const ctx = new ToolContext([b, provider, a]); + + expect(ctx.flatten()).toEqual([b, a, provider]); + }); +}); diff --git a/agents/src/llm/tool_context.ts b/agents/src/llm/tool_context.ts index ca2888167..df714d57a 100644 --- a/agents/src/llm/tool_context.ts +++ b/agents/src/llm/tool_context.ts @@ -167,6 +167,12 @@ export interface FunctionTool< > extends Tool { type: 'function'; + /** + * The name of the tool. Used to identify it inside a `ToolContext` and exposed to the LLM + * as the function name to call. + */ + name: string; + /** * The description of the tool. Will be used by the language model to decide whether to use the tool. */ @@ -190,38 +196,168 @@ export interface FunctionTool< [FUNCTION_TOOL_SYMBOL]: true; } -// TODO(AJS-112): support provider-defined tools in the future) -export type ToolContext = { - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Generic tool registry needs to accept any parameter/result types - [name: string]: FunctionTool; -}; +/** + * Convenience input shape accepted by APIs that want to take a list of tools directly without + * forcing callers to wrap them in `new ToolContext(...)`. + */ +export type ToolCtxInput = + | ToolContext + | readonly ToolContextEntry[]; + +export function toToolContext( + input: ToolCtxInput, +): ToolContext; +export function toToolContext( + input: ToolCtxInput | undefined, +): ToolContext | undefined; +export function toToolContext( + input: ToolCtxInput | undefined, +): ToolContext | undefined { + if (input === undefined) return undefined; + return input instanceof ToolContext ? input : new ToolContext(input); +} -export function isSameToolContext(ctx1: ToolContext, ctx2: ToolContext): boolean { - const toolNames = new Set(Object.keys(ctx1)); - const toolNames2 = new Set(Object.keys(ctx2)); +//TODO: toolset - accept stateful `Toolset` containers alongside `FunctionTool` / +// eslint-disable-next-line @typescript-eslint/no-explicit-any -- ToolContext entries accept any function-tool parameter/result types +export type ToolContextEntry = + // eslint-disable-next-line @typescript-eslint/no-explicit-any + FunctionTool | ProviderDefinedTool; + +export class ToolContext { + // TODO: toolset - widen entries to `FunctionTool | ProviderDefinedTool | Toolset` once Toolset + // lands so this stays heterogeneous like Python's `Sequence[Tool | Toolset]`. + private _tools: ToolContextEntry[] = []; + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- ToolContext stores generic function tools + private _functionToolsMap: Map> = new Map(); + private _providerTools: ProviderDefinedTool[] = []; + // TODO: toolset - populate when Toolset support is supported. + // so the `toolsets` getter and `equals` toolset-identity check stay byte-compatible with the + private _toolSets: unknown[] = []; + + // TODO: toolset - widen `tools` to `Sequence` once Toolset lands. + constructor(tools: readonly ToolContextEntry[] = []) { + this.updateTools(tools); + } - if (toolNames.size !== toolNames2.size) { - return false; + static empty(): ToolContext { + return new ToolContext([]); } - for (const name of toolNames) { - if (!toolNames2.has(name)) { - return false; + /** A copy of all function tools in the tool context, including those in tool sets. */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + get functionTools(): Record> { + return Object.fromEntries(this._functionToolsMap); + } + + /** A copy of all provider tools in the tool context, including those in tool sets. */ + get providerTools(): ProviderDefinedTool[] { + return this._providerTools; + } + + /** + * A copy of all tool sets in the tool context. + * + * TODO: toolset - wire up once Toolset is ported. + */ + get toolsets(): unknown[] { + return this._toolSets; + } + + /** + * A copy of the raw tool list this context was constructed with. + */ + get tools(): readonly ToolContextEntry[] { + return [...this._tools]; + } + + /** Flatten the tool context to a list of tools. */ + flatten(): Tool[] { + return [...this._functionToolsMap.values(), ...this._providerTools]; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Generic registry over any parameter/result types + getFunctionTool(name: string): FunctionTool | undefined { + return this._functionToolsMap.get(name); + } + + hasTool(name: string): boolean { + if (this._functionToolsMap.has(name)) { + return true; } + return this._providerTools.some((tool) => tool.id === name); + } - const tool1 = ctx1[name]; - const tool2 = ctx2[name]; + // TODO: toolset - widen `tools` to `Sequence` once Toolset lands. + updateTools(tools: readonly ToolContextEntry[]): void { + this._tools = [...tools]; + this._functionToolsMap = new Map(); + this._providerTools = []; + this._toolSets = []; + + // Mirrors Python's recursive `add_tool` (minus Toolset flattening, which is TODO). + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- accepts any tool shape + const addTool = (tool: any): void => { + if (isProviderDefinedTool(tool)) { + this._providerTools.push(tool); + return; + } + + if (isFunctionTool(tool)) { + const existing = this._functionToolsMap.get(tool.name); + if (existing !== undefined) { + if (existing !== tool) { + throw new Error(`duplicate function name: ${tool.name}`); + } + return; // same instance, skip + } + this._functionToolsMap.set(tool.name, tool); + return; + } + + // TODO: toolset - if (tool instanceof Toolset) { for (const t of tool.tools) addTool(t); + // this._toolSets.push(tool); return; } + + throw new Error(`unknown tool type: ${typeof tool}`); + }; - if (!tool1 || !tool2) { - return false; + // TODO: toolset - Python also chains `find_function_tools(self)` here so subclasses can + // declare tools as class members. JS doesn't use that decorator pattern, so we only walk + // the explicit input list. + for (const tool of tools) { + addTool(tool); } + } - if (tool1.description !== tool2.description) { + copy(): ToolContext { + return new ToolContext([...this._tools]); + } + + equals(other: ToolContext): boolean { + if (this._functionToolsMap.size !== other._functionToolsMap.size) { + return false; + } + for (const [name, tool] of this._functionToolsMap) { + if (other._functionToolsMap.get(name) !== tool) { + return false; + } + } + if (this._providerTools.length !== other._providerTools.length) { return false; } + // Provider tools compare as identity sets to match Python's `set(id(t) for t in ...)` + // semantics — order is not significant. + const otherProviderIds = new Set(other._providerTools); + for (const tool of this._providerTools) { + if (!otherProviderIds.has(tool)) { + return false; + } + } + // TODO: toolset - once Toolset lands, also compare `_toolSets` as identity sets per Python + // self_tool_set_ids = {id(ts) for ts in self._tool_sets} + // other_tool_set_ids = {id(ts) for ts in other._tool_sets} + // if self_tool_set_ids != other_tool_set_ids: return False + return true; } - - return true; } export function isSameToolChoice(choice1: ToolChoice | null, choice2: ToolChoice | null): boolean { @@ -248,11 +384,13 @@ export function tool< UserData = UnknownUserData, Result = unknown, >({ + name, description, parameters, execute, flags, }: { + name: string; description: string; parameters: Schema; execute: ToolExecuteFunction, UserData, Result>; @@ -263,10 +401,12 @@ export function tool< * Create a function tool without parameters. */ export function tool({ + name, description, execute, flags, }: { + name: string; description: string; parameters?: never; execute: ToolExecuteFunction, UserData, Result>; @@ -290,6 +430,10 @@ export function tool({ // eslint-disable-next-line @typescript-eslint/no-explicit-any export function tool(tool: any): any { if (tool.execute !== undefined) { + if (typeof tool.name !== 'string' || tool.name.length === 0) { + throw new Error('tool({ name, ... }) requires a non-empty name'); + } + // Default parameters to z.object({}) if not provided const parameters = tool.parameters ?? z.object({}); @@ -305,6 +449,7 @@ export function tool(tool: any): any { return { type: 'function', + name: tool.name, description: tool.description, parameters, execute: tool.execute, diff --git a/agents/src/llm/tool_context.type.test.ts b/agents/src/llm/tool_context.type.test.ts index 27cf9fe55..187f95e7b 100644 --- a/agents/src/llm/tool_context.type.test.ts +++ b/agents/src/llm/tool_context.type.test.ts @@ -8,6 +8,7 @@ import { type FunctionTool, type ProviderDefinedTool, type ToolOptions, tool } f describe('tool type inference', () => { it('should infer argument type from zod schema', () => { const toolType = tool({ + name: 'test', description: 'test', parameters: z.object({ number: z.number() }), execute: async () => 'test' as const, @@ -29,6 +30,7 @@ describe('tool type inference', () => { it('should infer run context type', () => { const toolType = tool({ + name: 'test', description: 'test', parameters: z.object({ number: z.number() }), execute: async ({ number }, { ctx }: ToolOptions<{ name: string }>) => { @@ -91,6 +93,7 @@ describe('tool type inference', () => { it('should infer empty object type when parameters are omitted', () => { const toolType = tool({ + name: 'simpleAction', description: 'Simple action without parameters', execute: async () => 'done' as const, }); @@ -100,6 +103,7 @@ describe('tool type inference', () => { it('should infer correct types with context but no parameters', () => { const toolType = tool({ + name: 'actionWithCtx', description: 'Action with context', execute: async (args, { ctx }: ToolOptions<{ userId: number }>) => { expectTypeOf(args).toEqualTypeOf>(); diff --git a/agents/src/llm/utils.ts b/agents/src/llm/utils.ts index 0271deb2a..e8e2c4cab 100644 --- a/agents/src/llm/utils.ts +++ b/agents/src/llm/utils.ts @@ -171,7 +171,7 @@ export const oaiBuildFunctionInfo = ( toolName: string, rawArgs: string, ): FunctionCall => { - const tool = toolCtx[toolName]; + const tool = toolCtx.getFunctionTool(toolName); if (!tool) { throw new Error(`AI tool ${toolName} not found`); } @@ -187,7 +187,7 @@ export async function executeToolCall( toolCall: FunctionCall, toolCtx: ToolContext, ): Promise { - const tool = toolCtx[toolCall.name]!; + const tool = toolCtx.getFunctionTool(toolCall.name)!; let args: object | undefined; let params: object | undefined; diff --git a/agents/src/voice/agent.test.ts b/agents/src/voice/agent.test.ts index 8dd83fee3..f2afa7c42 100644 --- a/agents/src/voice/agent.test.ts +++ b/agents/src/voice/agent.test.ts @@ -29,12 +29,14 @@ describe('Agent', () => { // Create mock tools using the tool function const mockTool1 = tool({ + name: 'getTool1', description: 'First test tool', parameters: z.object({}), execute: async () => 'tool1 result', }); const mockTool2 = tool({ + name: 'getTool2', description: 'Second test tool', parameters: z.object({ input: z.string().describe('Input parameter'), @@ -44,17 +46,14 @@ describe('Agent', () => { const agent = new Agent({ instructions, - tools: { - getTool1: mockTool1, - getTool2: mockTool2, - }, + tools: [mockTool1, mockTool2], }); expect(agent).toBeDefined(); expect(agent.instructions).toBe(instructions); // Assert tools are set correctly - const agentTools = agent.toolCtx; + const agentTools = agent.toolCtx.functionTools; expect(Object.keys(agentTools)).toHaveLength(2); expect(agentTools).toHaveProperty('getTool1'); expect(agentTools).toHaveProperty('getTool2'); @@ -64,27 +63,21 @@ describe('Agent', () => { expect(agentTools.getTool2?.description).toBe('Second test tool'); }); - it('should return a copy of tools, not the original reference', () => { + it('toolCtx returns a defensive copy that exposes the same tools', () => { const instructions = 'You are a helpful assistant'; const mockTool = tool({ + name: 'testTool', description: 'Test tool', parameters: z.object({}), execute: async () => 'result', }); - const tools = { testTool: mockTool }; - const agent = new Agent({ instructions, tools }); - - const tools1 = agent.toolCtx; - const tools2 = agent.toolCtx; - - // Should return different object references - expect(tools1).not.toBe(tools2); - expect(tools1).not.toBe(tools); + const agent = new Agent({ instructions, tools: [mockTool] }); - // Should contain the same set of tools - expect(tools1).toEqual(tools2); - expect(tools1).toEqual(tools); + // Each call returns a fresh ToolContext so external mutation can't escape into the agent's + // internal state. + expect(agent.toolCtx).not.toBe(agent.toolCtx); + expect(agent.toolCtx.getFunctionTool('testTool')).toBe(mockTool); }); it('should require AgentTask to run inside task context', async () => { diff --git a/agents/src/voice/agent.ts b/agents/src/voice/agent.ts index 890d7ea7d..3145de2b1 100644 --- a/agents/src/voice/agent.ts +++ b/agents/src/voice/agent.ts @@ -20,7 +20,8 @@ import { LLM, RealtimeModel, type ToolChoice, - type ToolContext, + ToolContext, + type ToolContextEntry, } from '../llm/index.js'; import { log } from '../log.js'; import type { STT, SpeechEvent } from '../stt/index.js'; @@ -119,7 +120,7 @@ export interface AgentOptions { id?: string; instructions: string | Instructions; chatCtx?: ChatContext; - tools?: ToolContext; + tools?: readonly ToolContextEntry[]; stt?: STT | STTModelString; vad?: VAD; llm?: LLM | RealtimeModel | LLMModels; @@ -157,7 +158,7 @@ export class Agent { _instructions: string | Instructions; /** @internal */ - _tools?: ToolContext; + _toolCtx: ToolContext; constructor({ id, @@ -190,10 +191,10 @@ export class Agent { } this._instructions = instructions; - this._tools = { ...tools }; + this._toolCtx = new ToolContext(tools ?? []); this._chatCtx = chatCtx ? chatCtx.copy({ - toolCtx: this._tools, + toolCtx: this._toolCtx, }) : ChatContext.empty(); @@ -269,7 +270,7 @@ export class Agent { } get toolCtx(): ToolContext { - return { ...this._tools }; + return this._toolCtx.copy(); } get session(): AgentSession { @@ -345,10 +346,10 @@ export class Agent { } // TODO(parity): Add when AgentConfigUpdate is ported to ChatContext. - async updateTools(tools: ToolContext): Promise { + async updateTools(tools: readonly ToolContextEntry[]): Promise { if (!this._agentActivity) { - this._tools = { ...tools }; - this._chatCtx = this._chatCtx.copy({ toolCtx: this._tools }); + this._toolCtx = new ToolContext(tools); + this._chatCtx = this._chatCtx.copy({ toolCtx: this._toolCtx }); return; } diff --git a/agents/src/voice/agent_activity.test.ts b/agents/src/voice/agent_activity.test.ts index 03ddd2dd4..186b4c904 100644 --- a/agents/src/voice/agent_activity.test.ts +++ b/agents/src/voice/agent_activity.test.ts @@ -18,6 +18,7 @@ import { Heap } from 'heap-js'; import { describe, expect, it, vi } from 'vitest'; import type { ChatContext } from '../llm/chat_context.js'; import { LLM, type LLMStream } from '../llm/llm.js'; +import { ToolContext } from '../llm/tool_context.js'; import { Future } from '../utils.js'; import { AgentActivity } from './agent_activity.js'; import type { PreemptiveGenerationInfo } from './audio_recognition.js'; @@ -270,15 +271,16 @@ function buildPreemptiveRunner(opts: Partial = {}) { const fakeChatCtx = { copy: () => fakeChatCtx } as unknown as ChatContext; + const emptyToolCtx = ToolContext.empty(); const fakeActivity = { _preemptiveGenerationCount: 0, _preemptiveGeneration: undefined, _currentSpeech: undefined as SpeechHandle | undefined, schedulingPaused: false, llm: new FakePreemptiveLLM(), - tools: {}, + tools: emptyToolCtx, toolChoice: null, - agent: { chatCtx: fakeChatCtx }, + agent: { chatCtx: fakeChatCtx, _toolCtx: emptyToolCtx }, agentSession: { sessionOptions: { turnHandling: { preemptiveGeneration: preemptiveOpts }, diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts index e53f39b14..3068b408d 100644 --- a/agents/src/voice/agent_activity.ts +++ b/agents/src/voice/agent_activity.ts @@ -36,11 +36,12 @@ import { type RealtimeModelError, type RealtimeSession, type ToolChoice, - type ToolContext, + ToolContext, + type ToolContextEntry, ToolFlag, } from '../llm/index.js'; import type { LLMError } from '../llm/llm.js'; -import { isSameToolChoice, isSameToolContext } from '../llm/tool_context.js'; +import { isSameToolChoice } from '../llm/tool_context.js'; import { log } from '../log.js'; import type { EOUMetrics, @@ -484,7 +485,12 @@ export class AgentActivity implements RecognitionHooks { } } - const initialTools = Object.keys(this.tools); + // Surface every tool the agent advertises at start — function tools by name and provider + // tools by id. + const initialTools = [ + ...Object.keys(this.agent._toolCtx.functionTools), + ...this.agent._toolCtx.providerTools.map((t) => t.id), + ]; if (runOnEnter && (this.agent.instructions || initialTools.length > 0)) { const initialConfig = new AgentConfigUpdate({ instructions: this.agent.instructions, @@ -609,7 +615,8 @@ export class AgentActivity implements RecognitionHooks { // tools update is supported or tools are the same reusable = reusable && - (capabilities.midSessionToolsUpdate || isSameToolContext(this.tools, newActivity.tools)); + (capabilities.midSessionToolsUpdate || + this.agent._toolCtx.equals(newActivity.agent._toolCtx)); if (reusable) { // detach: remove event listeners but don't close the session @@ -759,13 +766,14 @@ export class AgentActivity implements RecognitionHooks { } } - async updateTools(tools: ToolContext): Promise { - const oldToolNames = new Set(Object.keys(this.tools)); - const newToolNames = new Set(Object.keys(tools)); + async updateTools(tools: readonly ToolContextEntry[]): Promise { + const oldToolNames = new Set(Object.keys(this.agent._toolCtx.functionTools)); + const newToolCtx = new ToolContext(tools); + const newToolNames = new Set(Object.keys(newToolCtx.functionTools)); const toolsAdded = [...newToolNames].filter((name) => !oldToolNames.has(name)); const toolsRemoved = [...oldToolNames].filter((name) => !newToolNames.has(name)); - this.agent._tools = { ...tools }; + this.agent._toolCtx = newToolCtx; if (toolsAdded.length > 0 || toolsRemoved.length > 0) { const configUpdate = new AgentConfigUpdate({ @@ -777,12 +785,12 @@ export class AgentActivity implements RecognitionHooks { } if (this.realtimeSession) { - await this.realtimeSession.updateTools(tools); + await this.realtimeSession.updateTools(newToolCtx); } if (this.llm instanceof LLM) { // for realtime LLM, we assume the server will remove unvalid tool messages - await this.updateChatCtx(this.agent._chatCtx.copy({ toolCtx: tools })); + await this.updateChatCtx(this.agent._chatCtx.copy({ toolCtx: newToolCtx })); } } @@ -1421,7 +1429,7 @@ export class AgentActivity implements RecognitionHooks { userMessage, info, chatCtx: chatCtx.copy(), - tools: { ...this.tools }, + tools: this.agent._toolCtx.copy(), toolChoice: this.toolChoice, createdAt: Date.now(), }; @@ -1725,11 +1733,14 @@ export class AgentActivity implements RecognitionHooks { const shouldFilterTools = onEnterData?.agent === this.agent && onEnterData?.session === this.agentSession; - const tools = shouldFilterTools - ? Object.fromEntries( - Object.entries(this.agent.toolCtx).filter( - ([, fnTool]) => !(fnTool.flags & ToolFlag.IGNORE_ON_ENTER), - ), + const tools: ToolContext = shouldFilterTools + ? new ToolContext( + this.agent.toolCtx.tools.filter((t) => { + if (t.type === 'function') { + return !(t.flags & ToolFlag.IGNORE_ON_ENTER); + } + return true; + }), ) : this.agent.toolCtx; @@ -1912,7 +1923,7 @@ export class AgentActivity implements RecognitionHooks { if ( preemptive.info.newTranscript === userMessage?.textContent && preemptive.chatCtx.isEquivalent(chatCtx) && - isSameToolContext(preemptive.tools, this.tools) && + preemptive.tools.equals(this.agent._toolCtx) && isSameToolChoice(preemptive.toolChoice, this.toolChoice) ) { speechHandle = preemptive.speechHandle; diff --git a/agents/src/voice/amd.test.ts b/agents/src/voice/amd.test.ts index 8f8acd822..197bee11a 100644 --- a/agents/src/voice/amd.test.ts +++ b/agents/src/voice/amd.test.ts @@ -7,7 +7,7 @@ import type { ChatContext } from '../llm/chat_context.js'; import { FunctionCall } from '../llm/chat_context.js'; import type { ChatChunk } from '../llm/llm.js'; import { LLM, type LLMStream } from '../llm/llm.js'; -import type { ToolChoice, ToolContext } from '../llm/tool_context.js'; +import type { ToolChoice, ToolCtxInput } from '../llm/tool_context.js'; import type { SpeechEvent, SpeechStream } from '../stt/stt.js'; import { STT } from '../stt/stt.js'; import type { APIConnectOptions } from '../types.js'; @@ -30,7 +30,7 @@ class StaticLLM extends LLM { connOptions: _connOptions, }: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: ToolChoice; @@ -194,7 +194,7 @@ describe('AMD', () => { } chat({}: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions?: APIConnectOptions; }): LLMStream { return { @@ -538,7 +538,7 @@ describe('AMD', () => { label(): string { return 'postpone-llm'; } - chat({}: { chatCtx: ChatContext; toolCtx?: ToolContext }): LLMStream { + chat({}: { chatCtx: ChatContext; toolCtx?: ToolCtxInput }): LLMStream { callCount += 1; const isFirst = callCount === 1; return { diff --git a/agents/src/voice/amd.ts b/agents/src/voice/amd.ts index c1af28457..eeb1fd053 100644 --- a/agents/src/voice/amd.ts +++ b/agents/src/voice/amd.ts @@ -13,8 +13,7 @@ import type { LLMModels, STTModels } from '../inference/index.js'; import { ChatContext } from '../llm/chat_context.js'; import type { FunctionCall } from '../llm/chat_context.js'; import { LLM, type LLMStream } from '../llm/llm.js'; -import { isFunctionTool, tool } from '../llm/tool_context.js'; -import type { ToolContext } from '../llm/tool_context.js'; +import { ToolContext, type ToolContextEntry, isFunctionTool, tool } from '../llm/tool_context.js'; import { log } from '../log.js'; import { STT, SpeechEventType, type SpeechStream } from '../stt/stt.js'; import { traceTypes, tracer } from '../telemetry/index.js'; @@ -934,6 +933,7 @@ export class AMD extends (EventEmitter as new () => TypedEmitter) const isStale = (): boolean => generation !== this.detectGeneration || this.settled; const savePrediction = tool({ + name: 'save_prediction', description: 'Save the AMD prediction to the verdict.', parameters: z.object({ label: z.enum([ @@ -966,6 +966,7 @@ export class AMD extends (EventEmitter as new () => TypedEmitter) }); const postponeTermination = tool({ + name: 'postpone_termination', description: 'Postpone the termination of the classification task. ' + 'Use when the transcript is ambiguous and more audio is expected.', @@ -996,10 +997,11 @@ export class AMD extends (EventEmitter as new () => TypedEmitter) }, }); - const toolCtx: ToolContext = { save_prediction: savePrediction }; + const toolList: ToolContextEntry[] = [savePrediction]; if (this.extensionCount < MAX_EXTENSIONS) { - toolCtx.postpone_termination = postponeTermination; + toolList.push(postponeTermination); } + const toolCtx = new ToolContext(toolList); const chatCtx = new ChatContext(); chatCtx.addMessage({ role: 'system', content: this.prompt }); @@ -1035,7 +1037,7 @@ export class AMD extends (EventEmitter as new () => TypedEmitter) // Execute tool calls (save_prediction populates `savedResult`, // postpone_termination mutates the silence timer and returns). for (const tc of toolCalls) { - const fnTool = toolCtx[tc.name]; + const fnTool = toolCtx.getFunctionTool(tc.name); if (!fnTool || !isFunctionTool(fnTool)) continue; let parsedArgs: unknown = {}; try { diff --git a/agents/src/voice/generation.ts b/agents/src/voice/generation.ts index 3d938abdc..8bce7c198 100644 --- a/agents/src/voice/generation.ts +++ b/agents/src/voice/generation.ts @@ -485,7 +485,10 @@ export function performLLMInference( traceTypes.ATTR_CHAT_CTX, JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })), ); - span.setAttribute(traceTypes.ATTR_FUNCTION_TOOLS, JSON.stringify(Object.keys(toolCtx))); + span.setAttribute( + traceTypes.ATTR_FUNCTION_TOOLS, + JSON.stringify(Object.keys(toolCtx.functionTools)), + ); if (model) { span.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, model); @@ -992,7 +995,7 @@ export function performToolExecutions({ // TODO(brian): assert other toolChoice values - const tool = toolCtx[toolCall.name]; + const tool = toolCtx.getFunctionTool(toolCall.name); if (!tool) { logger.warn( { diff --git a/agents/src/voice/generation_tools.test.ts b/agents/src/voice/generation_tools.test.ts index d53e12196..9b4f7d1df 100644 --- a/agents/src/voice/generation_tools.test.ts +++ b/agents/src/voice/generation_tools.test.ts @@ -4,7 +4,7 @@ import { ReadableStream as NodeReadableStream } from 'stream/web'; import { describe, expect, it } from 'vitest'; import { z } from 'zod'; -import { FunctionCall, tool } from '../llm/index.js'; +import { FunctionCall, ToolContext, tool } from '../llm/index.js'; import { initializeLogger } from '../log.js'; import type { Task } from '../utils.js'; import { cancelAndWait, delay } from '../utils.js'; @@ -63,6 +63,7 @@ describe('Generation + Tool Execution', () => { // Tool that takes > 5 seconds let toolAborted = false; const getWeather = tool({ + name: 'getWeather', description: 'weather', parameters: z.object({ location: z.string() }), execute: async ({ location }, { abortSignal }) => { @@ -87,7 +88,7 @@ describe('Generation + Tool Execution', () => { const [execTask, toolOutput] = performToolExecutions({ session: {} as any, speechHandle: { id: 'speech_test', _itemAdded: () => {} } as any, - toolCtx: { getWeather } as any, + toolCtx: new ToolContext([getWeather]) as any, toolCallStream, controller: replyAbortController, onToolExecutionStarted: () => {}, @@ -115,6 +116,7 @@ describe('Generation + Tool Execution', () => { const replyAbortController = new AbortController(); const echo = tool({ + name: 'echo', description: 'echo', parameters: z.object({ msg: z.string() }), execute: async ({ msg }) => `echo: ${msg}`, @@ -130,7 +132,7 @@ describe('Generation + Tool Execution', () => { const [execTask, toolOutput] = performToolExecutions({ session: {} as any, speechHandle: { id: 'speech_test2', _itemAdded: () => {} } as any, - toolCtx: { echo } as any, + toolCtx: new ToolContext([echo]) as any, toolCallStream, controller: replyAbortController, }); @@ -147,6 +149,7 @@ describe('Generation + Tool Execution', () => { let aborted = false; const longOp = tool({ + name: 'longOp', description: 'longOp', parameters: z.object({ ms: z.number() }), execute: async ({ ms }, { abortSignal }) => { @@ -170,7 +173,7 @@ describe('Generation + Tool Execution', () => { const [execTask, toolOutput] = performToolExecutions({ session: {} as any, speechHandle: { id: 'speech_abort', _itemAdded: () => {} } as any, - toolCtx: { longOp } as any, + toolCtx: new ToolContext([longOp]) as any, toolCallStream, controller: replyAbortController, }); @@ -189,6 +192,7 @@ describe('Generation + Tool Execution', () => { const replyAbortController = new AbortController(); const echo = tool({ + name: 'echo', description: 'echo', parameters: z.object({ msg: z.string() }), execute: async ({ msg }) => `echo: ${msg}`, @@ -205,7 +209,7 @@ describe('Generation + Tool Execution', () => { const [execTask, toolOutput] = performToolExecutions({ session: {} as any, speechHandle: { id: 'speech_invalid', _itemAdded: () => {} } as any, - toolCtx: { echo } as any, + toolCtx: new ToolContext([echo]) as any, toolCallStream, controller: replyAbortController, }); @@ -220,11 +224,13 @@ describe('Generation + Tool Execution', () => { const replyAbortController = new AbortController(); const sum = tool({ + name: 'sum', description: 'sum', parameters: z.object({ a: z.number(), b: z.number() }), execute: async ({ a, b }) => a + b, }); const upper = tool({ + name: 'upper', description: 'upper', parameters: z.object({ s: z.string() }), execute: async ({ s }) => s.toUpperCase(), @@ -245,7 +251,7 @@ describe('Generation + Tool Execution', () => { const [execTask, toolOutput] = performToolExecutions({ session: {} as any, speechHandle: { id: 'speech_multi', _itemAdded: () => {} } as any, - toolCtx: { sum, upper } as any, + toolCtx: new ToolContext([sum, upper]) as any, toolCallStream, controller: replyAbortController, }); diff --git a/agents/src/voice/remote_session.ts b/agents/src/voice/remote_session.ts index f970b064e..ee1a3dab8 100644 --- a/agents/src/voice/remote_session.ts +++ b/agents/src/voice/remote_session.ts @@ -471,7 +471,7 @@ function sessionUsageToProto(usage: AgentSessionUsage): pb.AgentSessionUsage { function toolNames(toolCtx: ToolContext | undefined): string[] { if (!toolCtx) return []; - return Object.keys(toolCtx); + return Object.keys(toolCtx.functionTools); } function protoSerializeOptions(opts: { diff --git a/agents/src/voice/testing/fake_llm.ts b/agents/src/voice/testing/fake_llm.ts index ad3a1bf16..b6ba9b08a 100644 --- a/agents/src/voice/testing/fake_llm.ts +++ b/agents/src/voice/testing/fake_llm.ts @@ -4,7 +4,7 @@ import type { ChatContext } from '../../llm/chat_context.js'; import { FunctionCall } from '../../llm/chat_context.js'; import { LLMStream as BaseLLMStream, LLM, type LLMStream } from '../../llm/llm.js'; -import type { ToolChoice, ToolContext } from '../../llm/tool_context.js'; +import type { ToolChoice, ToolCtxInput } from '../../llm/tool_context.js'; import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../../types.js'; import { delay } from '../../utils.js'; @@ -42,7 +42,7 @@ export class FakeLLM extends LLM { connOptions = DEFAULT_API_CONNECT_OPTIONS, }: { chatCtx: ChatContext; - toolCtx?: ToolContext; + toolCtx?: ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: ToolChoice; @@ -65,7 +65,7 @@ class FakeLLMStream extends BaseLLMStream { constructor( fake: FakeLLM, - params: { chatCtx: ChatContext; toolCtx?: ToolContext; connOptions: APIConnectOptions }, + params: { chatCtx: ChatContext; toolCtx?: ToolCtxInput; connOptions: APIConnectOptions }, ) { super(fake, params); this.fake = fake; diff --git a/agents/src/voice/testing/run_result.ts b/agents/src/voice/testing/run_result.ts index 4ee0ccc56..0e60d03ce 100644 --- a/agents/src/voice/testing/run_result.ts +++ b/agents/src/voice/testing/run_result.ts @@ -817,6 +817,7 @@ export class MessageAssert extends EventAssert { // Create the check_intent tool const checkIntentTool = tool({ + name: 'check_intent', description: 'Determines whether the message correctly fulfills the given intent. ' + 'Returns success=true if the message satisfies the intent, false otherwise. ' + @@ -853,7 +854,7 @@ export class MessageAssert extends EventAssert { const stream = llm.chat({ chatCtx, - toolCtx: { check_intent: checkIntentTool }, + toolCtx: [checkIntentTool], toolChoice: { type: 'function', function: { name: 'check_intent' } }, extraKwargs: { temperature: 0 }, }); diff --git a/examples/src/background_audio.ts b/examples/src/background_audio.ts index fc718ac0c..8d5884be9 100644 --- a/examples/src/background_audio.ts +++ b/examples/src/background_audio.ts @@ -35,6 +35,7 @@ export default defineAgent({ logger.info('Connected to room'); const searchWeb = llm.tool({ + name: 'searchWeb', description: 'Search the web for information based on the given query. Always use this function whenever the user requests a web search', parameters: z.object({ @@ -49,9 +50,7 @@ export default defineAgent({ const agent = new voice.Agent({ instructions: 'You are a helpful assistant', - tools: { - searchWeb, - }, + tools: [searchWeb], }); const session = new voice.AgentSession({ diff --git a/examples/src/basic_agent.ts b/examples/src/basic_agent.ts index 95ecddb9a..79e79808a 100644 --- a/examples/src/basic_agent.ts +++ b/examples/src/basic_agent.ts @@ -27,8 +27,9 @@ export default defineAgent({ const agent = new voice.Agent({ instructions: "You are a helpful assistant, you can hear the user's message and respond to it.", - tools: { - getWeather: llm.tool({ + tools: [ + llm.tool({ + name: 'getWeather', description: 'Get the weather for a given location.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -37,7 +38,7 @@ export default defineAgent({ return `The weather in ${location} is sunny.`; }, }), - }, + ], }); const logger = log(); diff --git a/examples/src/basic_agent_task.ts b/examples/src/basic_agent_task.ts index aacbeee5c..c450a81bb 100644 --- a/examples/src/basic_agent_task.ts +++ b/examples/src/basic_agent_task.ts @@ -21,8 +21,9 @@ class InfoTask extends voice.AgentTask { super({ instructions: `Collect the user's information. around ${info}. Once you have the information, call the saveUserInfo tool to save the information to the database IMMEDIATELY. DO NOT have chitchat with the user, just collect the information and call the saveUserInfo tool.`, tts: 'elevenlabs/eleven_turbo_v2_5', - tools: { - saveUserInfo: llm.tool({ + tools: [ + llm.tool({ + name: 'saveUserInfo', description: `Save the user's ${info} to database`, parameters: z.object({ [info]: z.string(), @@ -32,7 +33,7 @@ class InfoTask extends voice.AgentTask { return `Thanks, collected ${info} successfully: ${args[info]}`; }, }), - }, + ], }); } @@ -48,8 +49,9 @@ class SurveyAgent extends voice.Agent { super({ instructions: 'You orchestrate a short intro survey. Speak naturally and keep the interaction brief.', - tools: { - collectUserInfo: llm.tool({ + tools: [ + llm.tool({ + name: 'collectUserInfo', description: 'Call this when user want to provide some information to you', parameters: z.object({ key: z @@ -63,15 +65,17 @@ class SurveyAgent extends voice.Agent { return `Collected ${key} successfully: ${value}`; }, }), - transferToWeatherAgent: llm.tool({ + llm.tool({ + name: 'transferToWeatherAgent', description: 'Call this immediately after user want to know the weather', execute: async () => { const agent = new voice.Agent({ instructions: 'You are a weather agent. You are responsible for providing the weather information to the user.', tts: 'deepgram/aura-2', - tools: { - getWeather: llm.tool({ + tools: [ + llm.tool({ + name: 'getWeather', description: 'Get the weather for a given location', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -80,7 +84,8 @@ class SurveyAgent extends voice.Agent { return `The weather in ${location} is sunny today.`; }, }), - finishWeatherConversation: llm.tool({ + llm.tool({ + name: 'finishWeatherConversation', description: 'Call this when you want to finish the weather conversation', execute: async () => { return llm.handoff({ @@ -89,13 +94,13 @@ class SurveyAgent extends voice.Agent { }); }, }), - }, + ], }); return llm.handoff({ agent, returns: "Let's start the weather conversation!" }); }, }), - }, + ], }); } diff --git a/examples/src/basic_task_group.ts b/examples/src/basic_task_group.ts index d40befe2a..3d8e04464 100644 --- a/examples/src/basic_task_group.ts +++ b/examples/src/basic_task_group.ts @@ -25,8 +25,9 @@ class CollectNameTask extends voice.AgentTask { instructions: 'Collect the user name from the latest user message. As soon as you have it, call save_name.', tts: taskTts, - tools: { - save_name: llm.tool({ + tools: [ + llm.tool({ + name: 'save_name', description: 'Save the user name.', parameters: z.object({ name: z.string().describe('The user name'), @@ -36,7 +37,7 @@ class CollectNameTask extends voice.AgentTask { return `Saved name: ${name}`; }, }), - }, + ], }); } @@ -54,8 +55,9 @@ class CollectEmailTask extends voice.AgentTask { instructions: 'Collect the user email from the latest user message. As soon as you have it, call save_email.', tts: taskTts, - tools: { - save_email: llm.tool({ + tools: [ + llm.tool({ + name: 'save_email', description: 'Save the user email.', parameters: z.object({ email: z.string().describe('The user email'), @@ -65,7 +67,7 @@ class CollectEmailTask extends voice.AgentTask { return `Saved email: ${email}`; }, }), - }, + ], }); } @@ -82,8 +84,9 @@ class TaskGroupDemoAgent extends voice.Agent { super({ instructions: 'You are onboarding assistant. When user asks to begin onboarding, call startOnboarding exactly once.', - tools: { - startOnboarding: llm.tool({ + tools: [ + llm.tool({ + name: 'startOnboarding', description: 'Start a two-step onboarding flow (name then email).', parameters: z.object({}), execute: async () => { @@ -107,7 +110,7 @@ class TaskGroupDemoAgent extends voice.Agent { return JSON.stringify(result.taskResults); }, }), - }, + ], }); } diff --git a/examples/src/basic_tool_call_agent.ts b/examples/src/basic_tool_call_agent.ts index 5642ef488..9a18de362 100644 --- a/examples/src/basic_tool_call_agent.ts +++ b/examples/src/basic_tool_call_agent.ts @@ -44,6 +44,7 @@ export default defineAgent({ }, entry: async (ctx: JobContext) => { const getWeather = llm.tool({ + name: 'getWeather', description: ' Called when the user asks about the weather.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -55,6 +56,7 @@ export default defineAgent({ }); const toggleLight = llm.tool({ + name: 'toggleLight', description: 'Called when the user asks to turn on or off the light.', parameters: z.object({ room: roomNameSchema.describe('The room to turn the light in'), @@ -70,6 +72,7 @@ export default defineAgent({ }); const getNumber = llm.tool({ + name: 'getNumber', description: 'Called when the user wants to get a number value, None if user want a random value', parameters: z.object({ @@ -87,6 +90,7 @@ export default defineAgent({ }); const checkStoredNumber = llm.tool({ + name: 'checkStoredNumber', description: 'Called when the user wants to check the stored number.', execute: async (_, { ctx }: llm.ToolOptions) => { return `The stored number is ${ctx.userData.number}.`; @@ -94,6 +98,7 @@ export default defineAgent({ }); const updateStoredNumber = llm.tool({ + name: 'updateStoredNumber', description: 'Called when the user wants to update the stored number.', parameters: z.object({ number: z.number().describe('The number to update the stored number to'), @@ -106,31 +111,33 @@ export default defineAgent({ const routerAgent = new RouterAgent({ instructions: 'You are a helpful assistant.', - tools: { + tools: [ getWeather, toggleLight, - playGame: llm.tool({ + llm.tool({ + name: 'playGame', description: 'Called when the user wants to play a game (transfer user to a game agent).', execute: async (): Promise => { return llm.handoff({ agent: gameAgent, returns: 'The game is now playing.' }); }, }), - }, + ], }); const gameAgent = new GameAgent({ instructions: 'You are a game agent. You are playing a game with the user.', - tools: { + tools: [ getNumber, checkStoredNumber, updateStoredNumber, - finishGame: llm.tool({ + llm.tool({ + name: 'finishGame', description: 'Called when the user wants to finish the game.', execute: async () => { return llm.handoff({ agent: routerAgent, returns: 'The game is now finished.' }); }, }), - }, + ], }); const vad = ctx.proc.userData.vad! as silero.VAD; diff --git a/examples/src/comprehensive_test.ts b/examples/src/comprehensive_test.ts index ddebfc4a7..f3b4974f0 100644 --- a/examples/src/comprehensive_test.ts +++ b/examples/src/comprehensive_test.ts @@ -76,8 +76,9 @@ class MainAgent extends voice.Agent { tts: ttsOptions['elevenlabs'](), llm: llmOptions['openai'](), turnDetection: eouOptions['multilingual'](), - tools: { - testAgent: llm.tool({ + tools: [ + llm.tool({ + name: 'testAgent', description: 'Called when user want to test an agent with STT, TTS, EOU, LLM, and optionally realtime LLM configuration', parameters: z.object({ @@ -102,7 +103,7 @@ class MainAgent extends voice.Agent { }); }, }), - }, + ], }); } @@ -159,8 +160,9 @@ class TestAgent extends voice.Agent { tts: tts, llm: realtimeModel ?? model, turnDetection: eou, - tools: { - testTool: llm.tool({ + tools: [ + llm.tool({ + name: 'testTool', description: "Testing agent's tool calling ability", parameters: z .object({ @@ -173,7 +175,8 @@ class TestAgent extends voice.Agent { }; }, }), - nextAgent: llm.tool({ + llm.tool({ + name: 'nextAgent', description: 'Called when user confirm current agent is working and want to proceed to next agent', parameters: z.object({ @@ -204,7 +207,7 @@ class TestAgent extends voice.Agent { }); }, }), - }, + ], }); this.sttChoice = sttChoice; diff --git a/examples/src/drive-thru/drivethru_agent.ts b/examples/src/drive-thru/drivethru_agent.ts index 9882f6fcd..e684f5c00 100644 --- a/examples/src/drive-thru/drivethru_agent.ts +++ b/examples/src/drive-thru/drivethru_agent.ts @@ -57,23 +57,24 @@ export class DriveThruAgent extends voice.Agent { super({ instructions, - tools: { - orderComboMeal: DriveThruAgent.buildComboOrderTool( + tools: [ + DriveThruAgent.buildComboOrderTool( userdata.comboItems, userdata.drinkItems, userdata.sauceItems, ), - orderHappyMeal: DriveThruAgent.buildHappyOrderTool( + DriveThruAgent.buildHappyOrderTool( userdata.happyItems, userdata.drinkItems, userdata.sauceItems, ), - orderRegularItem: DriveThruAgent.buildRegularOrderTool( + DriveThruAgent.buildRegularOrderTool( userdata.regularItems, userdata.drinkItems, userdata.sauceItems, ), - removeOrderItem: llm.tool({ + llm.tool({ + name: 'removeOrderItem', description: `Removes one or more items from the user's order using their \`orderId\`s. Useful when the user asks to cancel or delete existing items (e.g., "Remove the cheeseburger"). @@ -100,7 +101,8 @@ If the \`orderId\`s are unknown, call \`listOrderItems\` first to retrieve them. return 'Removed items:\n' + removedItems.map((item) => JSON.stringify(item)).join('\n'); }, }), - listOrderItems: llm.tool({ + llm.tool({ + name: 'listOrderItems', description: `Retrieves the current list of items in the user's order, including each item's internal \`orderId\`. Helpful when: @@ -120,7 +122,7 @@ Examples: return items.map((item) => JSON.stringify(item)).join('\n'); }, }), - }, + ], }); } @@ -134,6 +136,7 @@ Examples: const availableSauceIds = [...new Set(sauceItems.map((item) => item.id))]; return llm.tool({ + name: 'orderComboMeal', description: `Call this when the user orders a **Combo Meal**, like: "Number 4b with a large Sprite" or "I'll do a medium meal." Do not call this tool unless the user clearly refers to a known combo meal by name or number. @@ -222,6 +225,7 @@ If the user says just "a large meal," assume both drink and fries are that size. const availableSauceIds = [...new Set(sauceItems.map((item) => item.id))]; return llm.tool({ + name: 'orderHappyMeal', description: `Call this when the user orders a **Happy Meal**, typically for children. These meals come with a main item, a drink, and a sauce. The user must clearly specify a valid Happy Meal option (e.g., "Can I get a Happy Meal?"). @@ -299,6 +303,7 @@ Assume Small as default only if the user says "Happy Meal" and gives no size pre const availableIds = [...new Set(allItems.map((item) => item.id))]; return llm.tool({ + name: 'orderRegularItem', description: `Call this when the user orders **a single item on its own**, not as part of a Combo Meal or Happy Meal. The customer must provide clear and specific input. For example, item variants such as flavor must **always** be explicitly stated. diff --git a/examples/src/frontdesk/frontdesk_agent.ts b/examples/src/frontdesk/frontdesk_agent.ts index d5d2e1ab1..2fa60ba57 100644 --- a/examples/src/frontdesk/frontdesk_agent.ts +++ b/examples/src/frontdesk/frontdesk_agent.ts @@ -59,8 +59,9 @@ export class FrontDeskAgent extends voice.Agent { super({ instructions, - tools: { - scheduleAppointment: llm.tool({ + tools: [ + llm.tool({ + name: 'scheduleAppointment', description: 'Schedule an appointment at the given slot.', parameters: z.object({ slotId: z @@ -110,7 +111,8 @@ export class FrontDeskAgent extends voice.Agent { return `The appointment was successfully scheduled for ${formatted}.`; }, }), - listAvailableSlots: llm.tool({ + llm.tool({ + name: 'listAvailableSlots', description: `Return a plain-text list of available slots, one per line. - , , at () @@ -188,7 +190,7 @@ You must infer the appropriate range implicitly from the conversational context return lines.join('\n') || 'No slots available at the moment.'; }, }), - }, + ], }); this.tz = options.timezone; diff --git a/examples/src/gemini_realtime_agent.ts b/examples/src/gemini_realtime_agent.ts index dd22db51f..d70368352 100644 --- a/examples/src/gemini_realtime_agent.ts +++ b/examples/src/gemini_realtime_agent.ts @@ -47,6 +47,7 @@ type StoryData = { const roomNameSchema = z.enum(['bedroom', 'living room', 'kitchen', 'bathroom', 'office']); const getWeather = llm.tool({ + name: 'getWeather', description: 'Called when the user asks about the weather.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -59,6 +60,7 @@ const getWeather = llm.tool({ }); const toggleLight = llm.tool({ + name: 'toggleLight', description: 'Called when the user asks to turn on or off the light.', parameters: z.object({ room: roomNameSchema.describe('The room to turn the light in'), @@ -79,15 +81,16 @@ class IntroAgent extends voice.Agent { static create() { return new IntroAgent({ instructions: `You are a story teller. Your goal is to gather a few pieces of information from the user to make the story personalized and engaging. Ask the user for their name and where they are from.`, - tools: { - informationGathered: llm.tool({ + tools: [ + llm.tool({ + name: 'informationGathered', description: 'Called when the user has provided the information needed to make the story personalized and engaging.', parameters: z.object({ name: z.string().describe('The name of the user'), location: z.string().describe('The location of the user'), }), - execute: async ({ name, location }, { ctx }) => { + execute: async ({ name, location }, { ctx }: llm.ToolOptions) => { ctx.userData.name = name; ctx.userData.location = location; @@ -97,7 +100,7 @@ class IntroAgent extends voice.Agent { }), getWeather, toggleLight, - }, + ], }); } } diff --git a/examples/src/instructions_per_modality.ts b/examples/src/instructions_per_modality.ts index 71f2f3f04..643e3d433 100644 --- a/examples/src/instructions_per_modality.ts +++ b/examples/src/instructions_per_modality.ts @@ -57,8 +57,9 @@ class SchedulingAgent extends voice.Agent { super({ instructions, - tools: { - bookAppointment: llm.tool({ + tools: [ + llm.tool({ + name: 'bookAppointment', description: 'Book an appointment.', parameters: z.object({ date: z.string().describe('The date of the appointment in the format YYYY-MM-DD'), @@ -69,7 +70,7 @@ class SchedulingAgent extends voice.Agent { return `Appointment booked for ${date} at ${time}`; }, }), - }, + ], }); } diff --git a/examples/src/llm_fallback_adapter.ts b/examples/src/llm_fallback_adapter.ts index d053464dc..d86708548 100644 --- a/examples/src/llm_fallback_adapter.ts +++ b/examples/src/llm_fallback_adapter.ts @@ -71,8 +71,9 @@ export default defineAgent({ const agent = new voice.Agent({ instructions: 'You are a helpful assistant. Demonstrate that you are working by responding to user queries.', - tools: { - getWeather: llm.tool({ + tools: [ + llm.tool({ + name: 'getWeather', description: 'Get the weather for a given location.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -81,7 +82,7 @@ export default defineAgent({ return `The weather in ${location} is sunny with a temperature of 72°F.`; }, }), - }, + ], }); const session = new voice.AgentSession({ diff --git a/examples/src/manual_shutdown.ts b/examples/src/manual_shutdown.ts index 96bedb901..edb21b87c 100644 --- a/examples/src/manual_shutdown.ts +++ b/examples/src/manual_shutdown.ts @@ -25,8 +25,9 @@ export default defineAgent({ const agent = new voice.Agent({ instructions: "You are a helpful assistant, you can hear the user's message and respond to it, end the call when the user asks you to.", - tools: { - getWeather: llm.tool({ + tools: [ + llm.tool({ + name: 'getWeather', description: 'Get the weather for a given location.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -35,7 +36,8 @@ export default defineAgent({ return `The weather in ${location} is sunny.`; }, }), - endCall: llm.tool({ + llm.tool({ + name: 'endCall', description: 'End the call.', parameters: z.object({ reason: z @@ -56,7 +58,7 @@ export default defineAgent({ session.shutdown({ reason }); }, }), - }, + ], }); const session = new voice.AgentSession({ diff --git a/examples/src/multi_agent.ts b/examples/src/multi_agent.ts index 7f4819bed..12d87377d 100644 --- a/examples/src/multi_agent.ts +++ b/examples/src/multi_agent.ts @@ -35,15 +35,16 @@ class IntroAgent extends voice.Agent { static create() { return new IntroAgent({ instructions: `You are a story teller. Your goal is to gather a few pieces of information from the user to make the story personalized and engaging. Ask the user for their name and where they are from.`, - tools: { - informationGathered: llm.tool({ + tools: [ + llm.tool({ + name: 'informationGathered', description: 'Called when the user has provided the information needed to make the story personalized and engaging.', parameters: z.object({ name: z.string().describe('The name of the user'), location: z.string().describe('The location of the user'), }), - execute: async ({ name, location }, { ctx }) => { + execute: async ({ name, location }, { ctx }: llm.ToolOptions) => { ctx.userData.name = name; ctx.userData.location = location; @@ -51,7 +52,7 @@ class IntroAgent extends voice.Agent { return llm.handoff({ agent: storyAgent, returns: "Let's start the story!" }); }, }), - }, + ], }); } } diff --git a/examples/src/phonic_realtime_agent.ts b/examples/src/phonic_realtime_agent.ts index 35a038450..934485f5b 100644 --- a/examples/src/phonic_realtime_agent.ts +++ b/examples/src/phonic_realtime_agent.ts @@ -7,6 +7,7 @@ import { fileURLToPath } from 'node:url'; import { z } from 'zod'; const toggleLight = llm.tool({ + name: 'toggle_light', description: 'Toggle a light on or off. Available lights are A05, A06, A07, and A08.', parameters: z.object({ light_id: z.string().describe('The ID of the light to toggle'), @@ -23,9 +24,7 @@ export default defineAgent({ entry: async (ctx: JobContext) => { const agent = new voice.Agent({ instructions: 'You are a helpful voice AI assistant named Alex.', - tools: { - toggle_light: toggleLight, - }, + tools: [toggleLight], }); const session = new voice.AgentSession({ diff --git a/examples/src/raw_function_description.ts b/examples/src/raw_function_description.ts index 6548fd011..1e81c65d9 100644 --- a/examples/src/raw_function_description.ts +++ b/examples/src/raw_function_description.ts @@ -18,8 +18,9 @@ import { fileURLToPath } from 'node:url'; function createRawFunctionAgent() { return new voice.Agent({ instructions: 'You are a helpful assistant.', - tools: { - openGate: llm.tool({ + tools: [ + llm.tool({ + name: 'openGate', description: 'Opens a specified gate from a predefined set of access points.', parameters: { type: 'object', @@ -43,7 +44,7 @@ function createRawFunctionAgent() { return `The gate ${gateId} is now open.`; }, }), - }, + ], }); } diff --git a/examples/src/realtime_agent.ts b/examples/src/realtime_agent.ts index b30171776..647d5759b 100644 --- a/examples/src/realtime_agent.ts +++ b/examples/src/realtime_agent.ts @@ -24,6 +24,7 @@ export default defineAgent({ }, entry: async (ctx: JobContext) => { const getWeather = llm.tool({ + name: 'getWeather', description: ' Called when the user asks about the weather.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -34,6 +35,7 @@ export default defineAgent({ }); const toggleLight = llm.tool({ + name: 'toggleLight', description: 'Called when the user asks to turn on or off the light.', parameters: z.object({ room: roomNameSchema.describe('The room to turn the light in'), @@ -65,10 +67,7 @@ export default defineAgent({ instructions: "You are a helpful assistant created by LiveKit, always speaking English, you can hear the user's message and respond to it.", chatCtx, - tools: { - getWeather, - toggleLight, - }, + tools: [getWeather, toggleLight], }); const session = new voice.AgentSession({ diff --git a/examples/src/realtime_with_tts.ts b/examples/src/realtime_with_tts.ts index d87db7853..88c450f06 100644 --- a/examples/src/realtime_with_tts.ts +++ b/examples/src/realtime_with_tts.ts @@ -26,6 +26,7 @@ export default defineAgent({ const logger = log(); const getWeather = llm.tool({ + name: 'getWeather', description: 'Called when the user asks about the weather.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -38,9 +39,7 @@ export default defineAgent({ const agent = new voice.Agent({ instructions: 'You are a helpful assistant. Always speak in English.', - tools: { - getWeather, - }, + tools: [getWeather], }); const session = new voice.AgentSession({ diff --git a/examples/src/restaurant_agent.ts b/examples/src/restaurant_agent.ts index d9faaf9a5..4fb5281ba 100644 --- a/examples/src/restaurant_agent.ts +++ b/examples/src/restaurant_agent.ts @@ -79,6 +79,7 @@ function summarize({ } const updateName = llm.tool({ + name: 'updateName', description: 'Called when the user provides their name. Confirm the spelling with the user before calling the function.', parameters: z.object({ @@ -91,6 +92,7 @@ const updateName = llm.tool({ }); const updatePhone = llm.tool({ + name: 'updatePhone', description: 'Called when the user provides their phone number. Confirm the spelling with the user before calling the function.', parameters: z.object({ @@ -103,6 +105,7 @@ const updatePhone = llm.tool({ }); const toGreeter = llm.tool({ + name: 'toGreeter', description: 'Called when user asks any unrelated questions or requests any other services not in your job description.', execute: async (_, { ctx }: llm.ToolOptions) => { @@ -173,35 +176,37 @@ function createGreeterAgent(menu: string) { instructions: `You are a friendly restaurant receptionist. The menu is: ${menu}\nYour jobs are to greet the caller and understand if they want to make a reservation or order takeaway. Guide them to the right agent using tools.`, llm: new inference.LLM({ model: 'openai/gpt-4.1-mini' }), tts: new inference.TTS({ model: 'cartesia/sonic-3', voice: voices.greeter }), - tools: { - toReservation: llm.tool({ + tools: [ + llm.tool({ + name: 'toReservation', description: dedent` Called when user wants to make or update a reservation. This function handles transitioning to the reservation agent who will collect the necessary details like reservation time, customer name and phone number. `, - execute: async (_, { ctx }): Promise => { + execute: async (_, { ctx }: llm.ToolOptions): Promise => { return await greeter.transferToAgent({ name: 'reservation', ctx, }); }, }), - toTakeaway: llm.tool({ + llm.tool({ + name: 'toTakeaway', description: dedent` Called when the user wants to place a takeaway order. This includes handling orders for pickup, delivery, or when the user wants to proceed to checkout with their existing order. `, - execute: async (_, { ctx }): Promise => { + execute: async (_, { ctx }: llm.ToolOptions): Promise => { return await greeter.transferToAgent({ name: 'takeaway', ctx, }); }, }), - }, + ], }); return greeter; @@ -212,11 +217,12 @@ function createReservationAgent() { name: 'reservation', instructions: `You are a reservation agent at a restaurant. Your jobs are to ask for the reservation time, then customer's name, and phone number. Then confirm the reservation details with the customer.`, tts: new inference.TTS({ model: 'cartesia/sonic-3', voice: voices.reservation }), - tools: { + tools: [ updateName, updatePhone, toGreeter, - updateReservationTime: llm.tool({ + llm.tool({ + name: 'updateReservationTime', description: dedent` Called when the user provides their reservation time. Confirm the time with the user before calling the function. @@ -224,14 +230,18 @@ function createReservationAgent() { parameters: z.object({ time: z.string().describe('The reservation time'), }), - execute: async ({ time }, { ctx }) => { + execute: async ({ time }, { ctx }: llm.ToolOptions) => { ctx.userData.reservationTime = time; return `The reservation time is updated to ${time}`; }, }), - confirmReservation: llm.tool({ + llm.tool({ + name: 'confirmReservation', description: `Called when the user confirms the reservation.`, - execute: async (_, { ctx }): Promise => { + execute: async ( + _, + { ctx }: llm.ToolOptions, + ): Promise => { const userdata = ctx.userData; if (!userdata.customer.name || !userdata.customer.phone) { return 'Please provide your name and phone number first.'; @@ -245,7 +255,7 @@ function createReservationAgent() { }); }, }), - }, + ], }); return reservation; @@ -256,21 +266,26 @@ function createTakeawayAgent(menu: string) { name: 'takeaway', instructions: `Your are a takeaway agent that takes orders from the customer. Our menu is: ${menu}\nClarify special requests and confirm the order with the customer.`, tts: new inference.TTS({ model: 'cartesia/sonic-3', voice: voices.takeaway }), - tools: { + tools: [ toGreeter, - updateOrder: llm.tool({ + llm.tool({ + name: 'updateOrder', description: `Called when the user provides their order.`, parameters: z.object({ items: z.array(z.string()).describe('The items of the full order'), }), - execute: async ({ items }, { ctx }) => { + execute: async ({ items }, { ctx }: llm.ToolOptions) => { ctx.userData.order = items; return `The order is updated to ${items}`; }, }), - toCheckout: llm.tool({ + llm.tool({ + name: 'toCheckout', description: `Called when the user confirms the order.`, - execute: async (_, { ctx }): Promise => { + execute: async ( + _, + { ctx }: llm.ToolOptions, + ): Promise => { const userdata = ctx.userData; if (!userdata.order) { return 'No takeaway order found. Please make an order first.'; @@ -281,7 +296,7 @@ function createTakeawayAgent(menu: string) { }); }, }), - }, + ], }); return takeaway; @@ -292,21 +307,23 @@ function createCheckoutAgent(menu: string) { name: 'checkout', instructions: `You are a checkout agent at a restaurant. The menu is: ${menu}\nYour are responsible for confirming the expense of the order and then collecting customer's name, phone number and credit card information, including the card number, expiry date, and CVV step by step.`, tts: new inference.TTS({ model: 'cartesia/sonic-3', voice: voices.checkout }), - tools: { + tools: [ updateName, updatePhone, toGreeter, - confirmExpense: llm.tool({ + llm.tool({ + name: 'confirmExpense', description: `Called when the user confirms the expense.`, parameters: z.object({ expense: z.number().describe('The expense of the order'), }), - execute: async ({ expense }, { ctx }) => { + execute: async ({ expense }, { ctx }: llm.ToolOptions) => { ctx.userData.expense = expense; return `The expense is confirmed to be ${expense}`; }, }), - updateCreditCard: llm.tool({ + llm.tool({ + name: 'updateCreditCard', description: dedent` Called when the user provides their credit card number, expiry date, and CVV. Confirm the spelling with the user before calling the function. @@ -316,14 +333,18 @@ function createCheckoutAgent(menu: string) { expiry: z.string().describe('The expiry date of the credit card'), cvv: z.string().describe('The CVV of the credit card'), }), - execute: async ({ number, expiry, cvv }, { ctx }) => { + execute: async ({ number, expiry, cvv }, { ctx }: llm.ToolOptions) => { ctx.userData.creditCard = { number, expiry, cvv }; return `The credit card number is updated to ${number}`; }, }), - confirmCheckout: llm.tool({ + llm.tool({ + name: 'confirmCheckout', description: `Called when the user confirms the checkout.`, - execute: async (_, { ctx }): Promise => { + execute: async ( + _, + { ctx }: llm.ToolOptions, + ): Promise => { const userdata = ctx.userData; if (!userdata.expense) { return 'Please confirm the expense first.'; @@ -342,16 +363,17 @@ function createCheckoutAgent(menu: string) { }); }, }), - toTakeaway: llm.tool({ + llm.tool({ + name: 'toTakeaway', description: `Called when the user wants to update their order.`, - execute: async (_, { ctx }): Promise => { + execute: async (_, { ctx }: llm.ToolOptions): Promise => { return await checkout.transferToAgent({ name: 'takeaway', ctx, }); }, }), - }, + ], }); return checkout; diff --git a/examples/src/survey_agent.ts b/examples/src/survey_agent.ts index 8504fa1a7..918fd9859 100644 --- a/examples/src/survey_agent.ts +++ b/examples/src/survey_agent.ts @@ -76,6 +76,7 @@ async function writeCsvRow(path: string, data: Record): Promise function disqualifyTool() { return llm.tool({ + name: 'disqualify', description: 'End the interview if the candidate refuses to cooperate, provides inappropriate answers, or is not a fit.', parameters: z.object({ @@ -101,8 +102,9 @@ export class IntroTask extends voice.AgentTask { super({ instructions: 'You are Alex, an interviewer screening a software engineer candidate. Gather the candidate name and short self-introduction.', - tools: { - saveIntro: llm.tool({ + tools: [ + llm.tool({ + name: 'saveIntro', description: 'Save candidate name and intro notes.', parameters: z.object({ name: z.string().describe('Candidate name'), @@ -114,7 +116,7 @@ export class IntroTask extends voice.AgentTask { return `Saved intro for ${name}.`; }, }), - }, + ], }); } @@ -132,9 +134,10 @@ export class EmailTask extends voice.AgentTask { super({ instructions: 'Collect a valid email address. If the candidate refuses, call disqualify immediately.', - tools: { + tools: [ disqualify, - saveEmail: llm.tool({ + llm.tool({ + name: 'saveEmail', description: 'Save candidate email address.', parameters: z.object({ email: z.string().describe('Candidate email'), @@ -144,7 +147,7 @@ export class EmailTask extends voice.AgentTask { return `Saved email: ${email}`; }, }), - }, + ], }); } @@ -161,9 +164,10 @@ export class CommuteTask extends voice.AgentTask super({ instructions: 'Collect commute flexibility. The role expects office attendance three days per week.', - tools: { + tools: [ disqualify, - saveCommute: llm.tool({ + llm.tool({ + name: 'saveCommute', description: 'Save candidate commute information.', parameters: z.object({ canCommute: z.boolean().describe('Whether the candidate can commute to office'), @@ -176,7 +180,7 @@ export class CommuteTask extends voice.AgentTask return 'Saved commute flexibility.'; }, }), - }, + ], }); } @@ -194,9 +198,10 @@ export class ExperienceTask extends voice.AgentTask { super({ instructions: 'You are a survey interviewer for a software engineer screening. Be concise, professional, and natural. Call endScreening when the process is complete.', - tools: { - endScreening: llm.tool({ + tools: [ + llm.tool({ + name: 'endScreening', description: 'End interview and hang up.', execute: async (_, { ctx }: llm.ToolOptions) => { ctx.session.shutdown(); return 'Interview concluded.'; }, }), - }, + ], }); } diff --git a/examples/src/testing/agent_task.test.ts b/examples/src/testing/agent_task.test.ts index 163d232a1..38d2a85d9 100644 --- a/examples/src/testing/agent_task.test.ts +++ b/examples/src/testing/agent_task.test.ts @@ -148,8 +148,9 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { super({ instructions: 'You are collecting a name and role. Extract both from user input and call recordIntro.', - tools: { - recordIntro: llm.tool({ + tools: [ + llm.tool({ + name: 'recordIntro', description: 'Record the name and role', parameters: z.object({ name: z.string().describe('User name'), @@ -160,7 +161,7 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { return 'recorded'; }, }), - }, + ], }); } @@ -222,8 +223,9 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { super({ instructions: 'When asked to capture email, ALWAYS call captureEmail exactly once, then respond briefly.', - tools: { - captureEmail: llm.tool({ + tools: [ + llm.tool({ + name: 'captureEmail', description: 'Capture an email by running a nested AgentTask.', parameters: z.object({}), execute: async () => { @@ -236,7 +238,7 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { } }, }), - }, + ], }); } @@ -275,8 +277,9 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { instructions: 'You are Alex, an interviewer. Extract the candidate name and a short intro from the latest user input. ' + 'Use the tool recordIntro exactly once when both are available.', - tools: { - recordIntro: llm.tool({ + tools: [ + llm.tool({ + name: 'recordIntro', description: 'Record candidate name and intro summary.', parameters: z.object({ name: z.string().describe('Candidate name'), @@ -288,7 +291,7 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { return 'Intro recorded.'; }, }), - }, + ], }); } @@ -305,8 +308,9 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { super({ instructions: 'When the user asks to run the intro task, ALWAYS call collectIntroWithTask exactly once.', - tools: { - collectIntroWithTask: llm.tool({ + tools: [ + llm.tool({ + name: 'collectIntroWithTask', description: 'Launch the IntroTask and return the captured intro details.', parameters: z.object({}), execute: async () => { @@ -316,7 +320,7 @@ describe('AgentTask examples', { timeout: 120_000 }, () => { return JSON.stringify(result); }, }), - }, + ], }); } } diff --git a/examples/src/testing/basic_task_group.test.ts b/examples/src/testing/basic_task_group.test.ts index afbc7400e..5201a6c7c 100644 --- a/examples/src/testing/basic_task_group.test.ts +++ b/examples/src/testing/basic_task_group.test.ts @@ -82,8 +82,9 @@ class CollectNameTask extends voice.AgentTask { super({ instructions: 'Collect the user name from the latest user message. As soon as you have it, call save_name.', - tools: { - save_name: llm.tool({ + tools: [ + llm.tool({ + name: 'save_name', description: 'Save the user name.', parameters: z.object({ name: z.string().describe('The user name') }), execute: async ({ name }) => { @@ -91,7 +92,7 @@ class CollectNameTask extends voice.AgentTask { return `Saved name: ${name}`; }, }), - }, + ], }); this.ready = ready; } @@ -108,8 +109,9 @@ class CollectEmailTask extends voice.AgentTask { super({ instructions: 'Collect the user email from the latest user message. As soon as you have it, call save_email.', - tools: { - save_email: llm.tool({ + tools: [ + llm.tool({ + name: 'save_email', description: 'Save the user email.', parameters: z.object({ email: z.string().describe('The user email') }), execute: async ({ email }) => { @@ -117,7 +119,7 @@ class CollectEmailTask extends voice.AgentTask { return `Saved email: ${email}`; }, }), - }, + ], }); this.ready = ready; } diff --git a/examples/src/testing/run_result.test.ts b/examples/src/testing/run_result.test.ts index 583cbaffa..4169dea93 100644 --- a/examples/src/testing/run_result.test.ts +++ b/examples/src/testing/run_result.test.ts @@ -45,8 +45,9 @@ Response rules: - After ordering, confirm what was added (e.g., "I've added the burger to your order"). - When asked about sizes, always ask for clarification if not specified. - Be friendly and proactive in suggesting next steps.`, - tools: { - getWeather: llm.tool({ + tools: [ + llm.tool({ + name: 'getWeather', description: 'Get the current weather for a location', parameters: z.object({ location: z.string().describe('The city name'), @@ -59,14 +60,16 @@ Response rules: }); }, }), - getCurrentTime: llm.tool({ + llm.tool({ + name: 'getCurrentTime', description: 'Get the current time', parameters: z.object({}), execute: async () => { return '3:00 PM'; }, }), - orderItem: llm.tool({ + llm.tool({ + name: 'orderItem', description: 'Add an item to the order', parameters: z.object({ itemId: z.string().describe('The menu item ID'), @@ -84,7 +87,8 @@ Response rules: }); }, }), - getOrderStatus: llm.tool({ + llm.tool({ + name: 'getOrderStatus', description: 'Get the current order status', parameters: z.object({}), execute: async () => { @@ -98,7 +102,8 @@ Response rules: }); }, }), - getMenuItems: llm.tool({ + llm.tool({ + name: 'getMenuItems', description: 'Get available menu items and prices', parameters: z.object({ category: z @@ -128,7 +133,7 @@ Response rules: return JSON.stringify(menu); }, }), - }, + ], }); } } diff --git a/examples/src/testing/task_group.test.ts b/examples/src/testing/task_group.test.ts index 3d5afff06..188d46e59 100644 --- a/examples/src/testing/task_group.test.ts +++ b/examples/src/testing/task_group.test.ts @@ -260,8 +260,9 @@ describe('TaskGroup', { timeout: 120_000 }, () => { super({ instructions: 'Extract the user name from the latest user message. Call recordName immediately.', - tools: { - recordName: llm.tool({ + tools: [ + llm.tool({ + name: 'recordName', description: 'Record the user name', parameters: z.object({ name: z.string().describe('The user name') }), execute: async ({ name }) => { @@ -269,7 +270,7 @@ describe('TaskGroup', { timeout: 120_000 }, () => { return 'recorded'; }, }), - }, + ], }); } @@ -283,8 +284,9 @@ describe('TaskGroup', { timeout: 120_000 }, () => { super({ instructions: 'Extract an email address from the latest user message. Call recordEmail immediately.', - tools: { - recordEmail: llm.tool({ + tools: [ + llm.tool({ + name: 'recordEmail', description: 'Record the user email', parameters: z.object({ email: z.string().describe('The email address') }), execute: async ({ email }) => { @@ -292,7 +294,7 @@ describe('TaskGroup', { timeout: 120_000 }, () => { return 'recorded'; }, }), - }, + ], }); } @@ -400,8 +402,9 @@ describe('TaskGroup', { timeout: 120_000 }, () => { super({ instructions: 'Extract the user favorite color from the latest message. Call recordColor immediately.', - tools: { - recordColor: llm.tool({ + tools: [ + llm.tool({ + name: 'recordColor', description: 'Record favorite color', parameters: z.object({ color: z.string() }), execute: async ({ color }) => { @@ -409,7 +412,7 @@ describe('TaskGroup', { timeout: 120_000 }, () => { return 'recorded'; }, }), - }, + ], }); } @@ -423,8 +426,9 @@ describe('TaskGroup', { timeout: 120_000 }, () => { super({ instructions: 'Extract the user favorite food from the latest message. Call recordFood immediately.', - tools: { - recordFood: llm.tool({ + tools: [ + llm.tool({ + name: 'recordFood', description: 'Record favorite food', parameters: z.object({ food: z.string() }), execute: async ({ food }) => { @@ -432,7 +436,7 @@ describe('TaskGroup', { timeout: 120_000 }, () => { return 'recorded'; }, }), - }, + ], }); } diff --git a/examples/src/tool_call_disfluency.ts b/examples/src/tool_call_disfluency.ts index 8f92183a8..7e8018c99 100644 --- a/examples/src/tool_call_disfluency.ts +++ b/examples/src/tool_call_disfluency.ts @@ -39,6 +39,7 @@ export default defineAgent({ const vad = ctx.proc.userData.vad! as silero.VAD; const getWeather = llm.tool({ + name: 'getWeather', description: ' Called when the user asks about the weather.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -55,9 +56,7 @@ export default defineAgent({ const agent = new VoiceAgent({ instructions: "You are a helpful assistant, you can hear the user's message and respond to it.", - tools: { - getWeather, - }, + tools: [getWeather], }); const session = new voice.AgentSession({ diff --git a/examples/src/xai-realtime.ts b/examples/src/xai-realtime.ts index ad383a0a6..a5130a640 100644 --- a/examples/src/xai-realtime.ts +++ b/examples/src/xai-realtime.ts @@ -10,8 +10,9 @@ export default defineAgent({ entry: async (ctx: JobContext) => { const agent = new voice.Agent({ instructions: 'You are a helpful assistant. Keep your responses short and concise.', - tools: { - getWeather: llm.tool({ + tools: [ + llm.tool({ + name: 'getWeather', description: 'Get the weather for a given location.', parameters: z.object({ location: z.string().describe('The location to get the weather for'), @@ -20,7 +21,7 @@ export default defineAgent({ return `The weather in ${location} is sunny.`; }, }), - }, + ], }); const session = new voice.AgentSession({ diff --git a/plugins/baseten/src/llm.ts b/plugins/baseten/src/llm.ts index 4039b7cac..179a09344 100644 --- a/plugins/baseten/src/llm.ts +++ b/plugins/baseten/src/llm.ts @@ -72,19 +72,20 @@ export class OpenAILLM extends llm.LLM { chat({ chatCtx, - toolCtx, + toolCtx: toolCtxInput, connOptions = DEFAULT_API_CONNECT_OPTIONS, parallelToolCalls, toolChoice, extraKwargs, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; extraKwargs?: Record; }): inference.LLMStream { + const toolCtx = llm.toToolContext(toolCtxInput); const extras: Record = { ...extraKwargs }; if (this.#opts.metadata) { @@ -125,7 +126,11 @@ export class OpenAILLM extends llm.LLM { parallelToolCalls = parallelToolCalls !== undefined ? parallelToolCalls : this.#opts.parallelToolCalls; - if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) { + if ( + toolCtx && + Object.keys(toolCtx.functionTools).length > 0 && + parallelToolCalls !== undefined + ) { extras.parallel_tool_calls = parallelToolCalls; } diff --git a/plugins/cerebras/src/llm.test.ts b/plugins/cerebras/src/llm.test.ts index 3a0a3ca8b..dda5d7b3c 100644 --- a/plugins/cerebras/src/llm.test.ts +++ b/plugins/cerebras/src/llm.test.ts @@ -88,8 +88,9 @@ class WeatherAgent extends voice.Agent { constructor() { super({ instructions: 'You are a helpful assistant.', - tools: { - get_weather: llm.tool({ + tools: [ + llm.tool({ + name: 'get_weather', description: 'Get the current weather for a location.', parameters: z.object({ location: z.string().describe('The city name'), @@ -98,7 +99,7 @@ class WeatherAgent extends voice.Agent { return `The weather in ${location} is sunny, 72°F.`; }, }), - }, + ], }); } } diff --git a/plugins/google/src/beta/realtime/realtime_api.ts b/plugins/google/src/beta/realtime/realtime_api.ts index 8b9ada6eb..66bc6a7f9 100644 --- a/plugins/google/src/beta/realtime/realtime_api.ts +++ b/plugins/google/src/beta/realtime/realtime_api.ts @@ -451,7 +451,7 @@ export class RealtimeModel extends llm.RealtimeModel { * supporting both text and audio modalities with function calling capabilities. */ export class RealtimeSession extends llm.RealtimeSession { - private _tools: llm.ToolContext = {}; + private _tools: llm.ToolContext = llm.ToolContext.empty(); private _chatCtx = llm.ChatContext.empty(); private options: RealtimeOptions; @@ -780,7 +780,7 @@ export class RealtimeSession extends llm.RealtimeSession { } get tools(): llm.ToolContext { - return { ...this._tools }; + return this._tools.copy(); } get manualActivityDetection(): boolean { diff --git a/plugins/google/src/llm.ts b/plugins/google/src/llm.ts index 302b679af..e452b70d2 100644 --- a/plugins/google/src/llm.ts +++ b/plugins/google/src/llm.ts @@ -189,20 +189,21 @@ export class LLM extends llm.LLM { chat({ chatCtx, - toolCtx, + toolCtx: toolCtxInput, connOptions = DEFAULT_API_CONNECT_OPTIONS, toolChoice, extraKwargs, geminiTools, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; extraKwargs?: Record; geminiTools?: LLMTools; }): LLMStream { + const toolCtx = llm.toToolContext(toolCtxInput); const extras: GenerateContentConfig = { ...extraKwargs } as GenerateContentConfig; toolChoice = toolChoice !== undefined ? toolChoice : this.#opts.toolChoice; @@ -218,7 +219,7 @@ export class LLM extends llm.LLM { }, }; } else if (toolChoice === 'required') { - const toolNames = Object.entries(toolCtx || {}).map(([name]) => name); + const toolNames = Object.keys(toolCtx?.functionTools ?? {}); geminiToolConfig = { functionCallingConfig: { mode: FunctionCallingConfigMode.ANY, diff --git a/plugins/google/src/utils.ts b/plugins/google/src/utils.ts index 732ae0c3d..5548c076e 100644 --- a/plugins/google/src/utils.ts +++ b/plugins/google/src/utils.ts @@ -139,7 +139,7 @@ function isEmptyObjectSchema(jsonSchema: JSONSchema7Definition): boolean { export function toFunctionDeclarations(toolCtx: llm.ToolContext): FunctionDeclaration[] { const functionDeclarations: FunctionDeclaration[] = []; - for (const [name, tool] of Object.entries(toolCtx)) { + for (const [name, tool] of Object.entries(toolCtx.functionTools)) { const { description, parameters } = tool; const jsonSchema = llm.toJsonSchema(parameters, false); diff --git a/plugins/mistralai/src/llm.ts b/plugins/mistralai/src/llm.ts index f0c07f7a2..f6685b042 100644 --- a/plugins/mistralai/src/llm.ts +++ b/plugins/mistralai/src/llm.ts @@ -123,7 +123,7 @@ export class LLM extends llm.LLM { extraKwargs, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; @@ -187,7 +187,7 @@ export class LLMStream extends llm.LLMStream { client: Mistral; opts: LLMOpts; chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions: APIConnectOptions; extraKwargs: Record; }, @@ -211,8 +211,8 @@ export class LLMStream extends llm.LLMStream { // eslint-disable-next-line @typescript-eslint/no-explicit-any const toolsList: any[] = []; - if (this.toolCtx && Object.keys(this.toolCtx).length > 0) { - for (const [name, func] of Object.entries(this.toolCtx)) { + if (this.toolCtx && Object.keys(this.toolCtx.functionTools).length > 0) { + for (const [name, func] of Object.entries(this.toolCtx.functionTools)) { toolsList.push({ type: 'function' as const, function: { diff --git a/plugins/openai/src/llm.ts b/plugins/openai/src/llm.ts index e96551011..a358f5abd 100644 --- a/plugins/openai/src/llm.ts +++ b/plugins/openai/src/llm.ts @@ -468,19 +468,20 @@ export class LLM extends llm.LLM { chat({ chatCtx, - toolCtx, + toolCtx: toolCtxInput, connOptions = DEFAULT_API_CONNECT_OPTIONS, parallelToolCalls, toolChoice, extraKwargs, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; extraKwargs?: Record; }): LLMStream { + const toolCtx = llm.toToolContext(toolCtxInput); const extras: Record = { ...extraKwargs }; if (this.#opts.metadata) { @@ -509,7 +510,11 @@ export class LLM extends llm.LLM { parallelToolCalls = parallelToolCalls !== undefined ? parallelToolCalls : this.#opts.parallelToolCalls; - if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) { + if ( + toolCtx && + Object.keys(toolCtx.functionTools).length > 0 && + parallelToolCalls !== undefined + ) { extras.parallel_tool_calls = parallelToolCalls; } diff --git a/plugins/openai/src/realtime/realtime_model.ts b/plugins/openai/src/realtime/realtime_model.ts index 3a6a82095..8481e0f47 100644 --- a/plugins/openai/src/realtime/realtime_model.ts +++ b/plugins/openai/src/realtime/realtime_model.ts @@ -413,7 +413,7 @@ function processBaseURL({ * - openai_client_event_queued: expose the raw client events sent to the OpenAI Realtime API */ export class RealtimeSession extends llm.RealtimeSession { - private _tools: llm.ToolContext = {}; + private _tools: llm.ToolContext = llm.ToolContext.empty(); private remoteChatCtx: llm.RemoteChatContext = new llm.RemoteChatContext(); private messageChannel = new Queue(); private inputResampler?: AudioResampler; @@ -536,7 +536,7 @@ export class RealtimeSession extends llm.RealtimeSession { } get tools() { - return { ...this._tools } as llm.ToolContext; + return this._tools.copy(); } async updateChatCtx(_chatCtx: llm.ChatContext): Promise { @@ -698,13 +698,11 @@ export class RealtimeSession extends llm.RealtimeSession { // TODO(brian): these logics below are noops I think, leaving it here to keep // parity with the python but we should remove them later const retainedToolNames = new Set(ev.session.tools.map((tool) => tool.name)); - const retainedTools = Object.fromEntries( - Object.entries(_tools).filter( - ([name, tool]) => llm.isFunctionTool(tool) && retainedToolNames.has(name), - ), - ); + const retainedTools = Object.entries(_tools.functionTools) + .filter(([name]) => retainedToolNames.has(name)) + .map(([, tool]) => tool); - this._tools = retainedTools as llm.ToolContext; + this._tools = new llm.ToolContext(retainedTools); unlock(); } @@ -712,12 +710,7 @@ export class RealtimeSession extends llm.RealtimeSession { private createToolsUpdateEvent(_tools: llm.ToolContext): api_proto.SessionUpdateEvent { const oaiTools: api_proto.Tool[] = []; - for (const [name, tool] of Object.entries(_tools)) { - if (!llm.isFunctionTool(tool)) { - this.#logger.error({ name, tool }, "OpenAI Realtime API doesn't support this tool type"); - continue; - } - + for (const [name, tool] of Object.entries(_tools.functionTools)) { const { parameters: toolParameters, description } = tool; try { const parameters = llm.toJsonSchema( @@ -998,7 +991,7 @@ export class RealtimeSession extends llm.RealtimeSession { events.push(this.createSessionUpdateEvent()); // tools - if (Object.keys(this._tools).length > 0) { + if (Object.keys(this._tools.functionTools).length > 0) { events.push(this.createToolsUpdateEvent(this._tools)); } diff --git a/plugins/openai/src/responses/llm.ts b/plugins/openai/src/responses/llm.ts index 494a05c87..9a255d046 100644 --- a/plugins/openai/src/responses/llm.ts +++ b/plugins/openai/src/responses/llm.ts @@ -77,25 +77,30 @@ class ResponsesHttpLLM extends llm.LLM { override chat({ chatCtx, - toolCtx, + toolCtx: toolCtxInput, connOptions = DEFAULT_API_CONNECT_OPTIONS, parallelToolCalls, toolChoice, extraKwargs, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; extraKwargs?: Record; }): ResponsesHttpLLMStream { + const toolCtx = llm.toToolContext(toolCtxInput); const modelOptions: Record = { ...(extraKwargs || {}) }; parallelToolCalls = parallelToolCalls !== undefined ? parallelToolCalls : this.#opts.parallelToolCalls; - if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) { + if ( + toolCtx && + Object.keys(toolCtx.functionTools).length > 0 && + parallelToolCalls !== undefined + ) { modelOptions.parallel_tool_calls = parallelToolCalls; } @@ -182,7 +187,7 @@ class ResponsesHttpLLMStream extends llm.LLMStream { )) as OpenAI.Responses.ResponseInputItem[]; const tools = this.toolCtx - ? Object.entries(this.toolCtx).map(([name, func]) => { + ? Object.entries(this.toolCtx.functionTools).map(([name, func]) => { const oaiParams = { type: 'function' as const, name: name, @@ -417,7 +422,7 @@ export class LLM extends llm.LLM { extraKwargs, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; diff --git a/plugins/openai/src/ws/llm.ts b/plugins/openai/src/ws/llm.ts index 1dd6483c6..d22d7a753 100644 --- a/plugins/openai/src/ws/llm.ts +++ b/plugins/openai/src/ws/llm.ts @@ -231,24 +231,29 @@ export class WSLLM extends llm.LLM { chat({ chatCtx, - toolCtx, + toolCtx: toolCtxInput, connOptions = DEFAULT_API_CONNECT_OPTIONS, parallelToolCalls, toolChoice, extraKwargs, }: { chatCtx: llm.ChatContext; - toolCtx?: llm.ToolContext; + toolCtx?: llm.ToolCtxInput; connOptions?: APIConnectOptions; parallelToolCalls?: boolean; toolChoice?: llm.ToolChoice; extraKwargs?: Record; }): WSLLMStream { + const toolCtx = llm.toToolContext(toolCtxInput); const modelOptions: Record = { ...(extraKwargs ?? {}) }; parallelToolCalls = parallelToolCalls !== undefined ? parallelToolCalls : this.#opts.parallelToolCalls; - if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) { + if ( + toolCtx && + Object.keys(toolCtx.functionTools).length > 0 && + parallelToolCalls !== undefined + ) { modelOptions.parallel_tool_calls = parallelToolCalls; } @@ -425,7 +430,7 @@ export class WSLLMStream extends llm.LLMStream { )) as OpenAI.Responses.ResponseInputItem[]; const tools = this.toolCtx - ? Object.entries(this.toolCtx).map(([name, func]) => { + ? Object.entries(this.toolCtx.functionTools).map(([name, func]) => { const oaiParams = { type: 'function' as const, name, diff --git a/plugins/phonic/src/realtime/realtime_model.ts b/plugins/phonic/src/realtime/realtime_model.ts index 665c5c5ba..09933b580 100644 --- a/plugins/phonic/src/realtime/realtime_model.ts +++ b/plugins/phonic/src/realtime/realtime_model.ts @@ -239,7 +239,7 @@ interface GenerationState { * Realtime session for Phonic (https://docs.phonic.co/) */ export class RealtimeSession extends llm.RealtimeSession { - private _tools: llm.ToolContext = {}; + private _tools: llm.ToolContext = llm.ToolContext.empty(); private _chatCtx = llm.ChatContext.empty(); private options: RealtimeModelOptions; @@ -290,7 +290,7 @@ export class RealtimeSession extends llm.RealtimeSession { } get tools(): llm.ToolContext { - return { ...this._tools }; + return this._tools.copy(); } async updateInstructions(instructions: string): Promise { @@ -367,26 +367,24 @@ export class RealtimeSession extends llm.RealtimeSession { return; } - this._tools = { ...tools }; - this.toolDefinitions = Object.entries(tools) - .filter(([_, tool]) => llm.isFunctionTool(tool)) - .map(([name, tool]) => ({ - type: 'custom_websocket', - tool_schema: { - type: 'function', - function: { - name, - description: tool.description, - parameters: llm.toJsonSchema(tool.parameters), - strict: true, - }, + this._tools = tools.copy(); + this.toolDefinitions = Object.entries(tools.functionTools).map(([name, tool]) => ({ + type: 'custom_websocket', + tool_schema: { + type: 'function', + function: { + name, + description: tool.description, + parameters: llm.toJsonSchema(tool.parameters), + strict: true, }, - tool_call_output_timeout_ms: TOOL_CALL_OUTPUT_TIMEOUT_MS, - // Tool chaining and tool calls during speech are not supported at this time - // for ease of implementation within the RealtimeSession generations framework - wait_for_speech_before_tool_call: true, - allow_tool_chaining: false, - })); + }, + tool_call_output_timeout_ms: TOOL_CALL_OUTPUT_TIMEOUT_MS, + // Tool chaining and tool calls during speech are not supported at this time + // for ease of implementation within the RealtimeSession generations framework + wait_for_speech_before_tool_call: true, + allow_tool_chaining: false, + })); this.toolsReady.resolve(); } @@ -405,24 +403,22 @@ export class RealtimeSession extends llm.RealtimeSession { this.options.instructions = instructions; } if (tools !== undefined) { - this._tools = { ...tools }; - this.toolDefinitions = Object.entries(tools) - .filter(([, tool]) => llm.isFunctionTool(tool)) - .map(([name, tool]) => ({ - type: 'custom_websocket', - tool_schema: { - type: 'function', - function: { - name, - description: tool.description, - parameters: llm.toJsonSchema(tool.parameters), - strict: true, - }, + this._tools = tools.copy(); + this.toolDefinitions = Object.entries(tools.functionTools).map(([name, tool]) => ({ + type: 'custom_websocket', + tool_schema: { + type: 'function', + function: { + name, + description: tool.description, + parameters: llm.toJsonSchema(tool.parameters), + strict: true, }, - tool_call_output_timeout_ms: TOOL_CALL_OUTPUT_TIMEOUT_MS, - wait_for_speech_before_tool_call: true, - allow_tool_chaining: false, - })); + }, + tool_call_output_timeout_ms: TOOL_CALL_OUTPUT_TIMEOUT_MS, + wait_for_speech_before_tool_call: true, + allow_tool_chaining: false, + })); } if (chatCtx !== undefined) { this._chatCtx = chatCtx.copy(); diff --git a/plugins/test/src/llm.ts b/plugins/test/src/llm.ts index 8d85c75c3..534dce2df 100644 --- a/plugins/test/src/llm.ts +++ b/plugins/test/src/llm.ts @@ -5,8 +5,9 @@ import { initializeLogger, llm as llmlib } from '@livekit/agents'; import { describe, expect, it } from 'vitest'; import { z } from 'zod/v4'; -const toolCtx: llmlib.ToolContext = { - getWeather: llmlib.tool({ +const toolCtx = new llmlib.ToolContext([ + llmlib.tool({ + name: 'getWeather', description: 'Get the current weather in a given location', parameters: z.object({ location: z.string().describe('The city and state, e.g. San Francisco, CA'), @@ -14,14 +15,16 @@ const toolCtx: llmlib.ToolContext = { }), execute: async () => {}, }), - playMusic: llmlib.tool({ + llmlib.tool({ + name: 'playMusic', description: 'Play music', parameters: z.object({ name: z.string().describe('The artist and name of the song'), }), execute: async () => {}, }), - toggleLight: llmlib.tool({ + llmlib.tool({ + name: 'toggleLight', description: 'Turn on/off the lights in a room', parameters: z.object({ name: z.string().describe('The room to control'), @@ -31,7 +34,8 @@ const toolCtx: llmlib.ToolContext = { await new Promise((resolve) => setTimeout(resolve, 60_000)); }, }), - selectCurrencies: llmlib.tool({ + llmlib.tool({ + name: 'selectCurrencies', description: 'Currencies of a specific area', parameters: z.object({ currencies: z @@ -40,7 +44,8 @@ const toolCtx: llmlib.ToolContext = { }), execute: async () => {}, }), - updateUserInfo: llmlib.tool({ + llmlib.tool({ + name: 'updateUserInfo', description: 'Update user info.', parameters: z.object({ email: z.string().optional().describe("User's email address"), @@ -49,18 +54,20 @@ const toolCtx: llmlib.ToolContext = { }), execute: async () => {}, }), - simulateFailure: llmlib.tool({ + llmlib.tool({ + name: 'simulateFailure', description: 'Simulate a failure', parameters: z.object({}), execute: async () => { throw new Error('Simulated failure'); }, }), -}; +]); // Tool context for strict mode - uses nullable() instead of optional() -const toolCtxStrict: llmlib.ToolContext = { - getWeather: llmlib.tool({ +const toolCtxStrict = new llmlib.ToolContext([ + llmlib.tool({ + name: 'getWeather', description: 'Get the current weather in a given location', parameters: z.object({ location: z.string().describe('The city and state, e.g. San Francisco, CA'), @@ -68,14 +75,16 @@ const toolCtxStrict: llmlib.ToolContext = { }), execute: async () => {}, }), - playMusic: llmlib.tool({ + llmlib.tool({ + name: 'playMusic', description: 'Play music', parameters: z.object({ name: z.string().describe('The artist and name of the song'), }), execute: async () => {}, }), - toggleLight: llmlib.tool({ + llmlib.tool({ + name: 'toggleLight', description: 'Turn on/off the lights in a room', parameters: z.object({ name: z.string().describe('The room to control'), @@ -85,7 +94,8 @@ const toolCtxStrict: llmlib.ToolContext = { await new Promise((resolve) => setTimeout(resolve, 60_000)); }, }), - selectCurrencies: llmlib.tool({ + llmlib.tool({ + name: 'selectCurrencies', description: 'Currencies of a specific area', parameters: z.object({ currencies: z @@ -94,7 +104,8 @@ const toolCtxStrict: llmlib.ToolContext = { }), execute: async () => {}, }), - updateUserInfo: llmlib.tool({ + llmlib.tool({ + name: 'updateUserInfo', description: 'Update user info.', parameters: z.object({ email: z.string().nullable().describe("User's email address"), @@ -103,14 +114,15 @@ const toolCtxStrict: llmlib.ToolContext = { }), execute: async () => {}, }), - simulateFailure: llmlib.tool({ + llmlib.tool({ + name: 'simulateFailure', description: 'Simulate a failure', parameters: z.object({}), execute: async () => { throw new Error('Simulated failure'); }, }), -}; +]); export const llm = async (llm: llmlib.LLM, skipOptionalArgs: boolean) => { initializeLogger({ pretty: false }); @@ -315,7 +327,7 @@ const executeCalls = async (calls: llmlib.FunctionCall[]) => { const results: llmlib.FunctionCallOutput[] = []; for (const call of calls) { - const tool = toolCtx[call.name]; + const tool = toolCtx.getFunctionTool(call.name); if (!tool) { throw new Error(`Tool ${call.name} not found`); }