From 72803aec3f7450f3d745491dd747e1d58ef3235d Mon Sep 17 00:00:00 2001 From: royalfig Date: Fri, 5 Jun 2026 15:49:37 -0400 Subject: [PATCH] Update starter to use new turn detector model --- README.md | 2 +- package.json | 4 +--- src/main.ts | 25 +++++++++---------------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 4f828e1..08d33af 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ lk app env -w -d .env.local ## Run the agent -Before your first run, you must download certain models such as [Silero VAD](https://docs.livekit.io/agents/logic/turns/vad/) and the [LiveKit turn detector](https://docs.livekit.io/agents/logic/turns/turn-detector/): +The [LiveKit turn detector](https://docs.livekit.io/agents/logic/turns/turn-detector/) and its VAD are now built into the Agents SDK, so no model download is required before your first run. If you add a plugin that ships its own model weights (any `@livekit/agents-plugin-*` package), download them with: ```console pnpm run download-files diff --git a/package.json b/package.json index fd7af93..b2a771c 100644 --- a/package.json +++ b/package.json @@ -34,9 +34,7 @@ "vitest": "^4.1.4" }, "dependencies": { - "@livekit/agents": "^1.4.3", - "@livekit/agents-plugin-livekit": "^1.4.3", - "@livekit/agents-plugin-silero": "^1.4.3", + "@livekit/agents": "^1.5.0", "@livekit/plugins-ai-coustics": "^0.2.14", "dotenv": "^17.4.1", "zod": "^3.25.76" diff --git a/src/main.ts b/src/main.ts index a6df755..4096936 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,6 +1,4 @@ import { ServerOptions, cli, defineAgent, inference, voice } from '@livekit/agents'; -import * as livekit from '@livekit/agents-plugin-livekit'; -import * as silero from '@livekit/agents-plugin-silero'; import { audioEnhancement } from '@livekit/plugins-ai-coustics'; import dotenv from 'dotenv'; import { fileURLToPath } from 'node:url'; @@ -11,14 +9,7 @@ import { Agent } from './agent'; // when running locally or self-hosting your agent server. dotenv.config({ path: '.env.local' }); -interface ProcessUserData { - vad: silero.VAD; -} - -export default defineAgent({ - prewarm: async (proc) => { - proc.userData.vad = await silero.VAD.load(); - }, +export default defineAgent({ entry: async (ctx) => { // Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector const session = new voice.AgentSession({ @@ -36,13 +27,15 @@ export default defineAgent({ voice: '9626c31c-bec5-4cca-baa8-f8ba9e84c8bc', }), - // VAD and turn detection are used to determine when the user is speaking and when the agent should respond - // See more at https://docs.livekit.io/agents/build/turns - turnDetection: new livekit.turnDetector.MultilingualModel(), - vad: ctx.proc.userData.vad, - voiceOptions: { + // Turn detection determines when the user is speaking and when the agent should respond. + // The LiveKit audio turn detector is a multimodal model that encodes the user's audio + // directly to predict end of turn. It's built into the SDK (no extra plugin) and + // AgentSession supplies the required VAD automatically. + // See more at https://docs.livekit.io/agents/logic/turns/turn-detector/ + turnHandling: { + turnDetection: new inference.AudioTurnDetector(), // Allow the LLM to generate a response while waiting for the end of turn - preemptiveGeneration: true, + preemptiveGeneration: { enabled: true }, }, });