|
| 1 | +""" |
| 2 | +Local Transport Example |
| 3 | +
|
| 4 | +Demonstrates using LocalTransport for local audio/video I/O with vision agents. |
| 5 | +This enables running agents using your microphone, speakers, and camera without |
| 6 | +cloud-based edge infrastructure. |
| 7 | +
|
| 8 | +Usage: |
| 9 | + uv run python local_transport_example.py run |
| 10 | +
|
| 11 | +Requirements: |
| 12 | + - Working microphone and speakers |
| 13 | + - Optional: Camera for video input |
| 14 | + - API keys for Gemini, Deepgram, and ElevenLabs in .env file |
| 15 | +""" |
| 16 | + |
| 17 | +import logging |
| 18 | +from typing import Any |
| 19 | + |
| 20 | +from dotenv import load_dotenv |
| 21 | +from vision_agents.core import Agent, AgentLauncher, Runner, User |
| 22 | +from vision_agents.core.utils.examples import get_weather_by_location |
| 23 | +from vision_agents.plugins import deepgram, gemini |
| 24 | +from vision_agents.plugins.local import LocalEdge |
| 25 | +from vision_agents.plugins.local.devices import ( |
| 26 | + select_audio_input_device, |
| 27 | + select_audio_output_device, |
| 28 | + select_video_device, |
| 29 | +) |
| 30 | + |
| 31 | +logger = logging.getLogger(__name__) |
| 32 | + |
| 33 | +load_dotenv() |
| 34 | + |
| 35 | +INSTRUCTIONS = ( |
| 36 | + "You're a helpful voice AI assistant running on the user's local machine. " |
| 37 | + "Keep responses short and conversational. Don't use special characters or " |
| 38 | + "formatting. Be friendly and helpful." |
| 39 | +) |
| 40 | + |
| 41 | + |
| 42 | +def setup_llm(model: str = "gemini-3.1-flash-lite-preview") -> gemini.LLM: |
| 43 | + llm = gemini.LLM(model) |
| 44 | + |
| 45 | + @llm.register_function(description="Get current weather for a location") |
| 46 | + async def get_weather(location: str) -> dict[str, Any]: |
| 47 | + return await get_weather_by_location(location) |
| 48 | + |
| 49 | + return llm |
| 50 | + |
| 51 | + |
| 52 | +async def create_agent() -> Agent: |
| 53 | + llm = setup_llm() |
| 54 | + |
| 55 | + if input_device is None: |
| 56 | + raise RuntimeError("No audio input device available") |
| 57 | + if output_device is None: |
| 58 | + raise RuntimeError("No audio output device available") |
| 59 | + |
| 60 | + logger.info(f"Using input: {input_device.name} ({input_device.sample_rate}Hz)") |
| 61 | + logger.info(f"Using output: {output_device.name} ({output_device.sample_rate}Hz)") |
| 62 | + if video_device: |
| 63 | + logger.info(f"Using video device: {video_device.name}") |
| 64 | + |
| 65 | + transport = LocalEdge( |
| 66 | + audio_input=input_device, |
| 67 | + audio_output=output_device, |
| 68 | + video_input=video_device, |
| 69 | + ) |
| 70 | + |
| 71 | + agent = Agent( |
| 72 | + edge=transport, |
| 73 | + agent_user=User(name="Local AI Assistant", id="local-agent"), |
| 74 | + instructions=INSTRUCTIONS, |
| 75 | + processors=[], |
| 76 | + llm=llm, |
| 77 | + tts=deepgram.TTS(), |
| 78 | + stt=deepgram.STT(eager_turn_detection=True), |
| 79 | + ) |
| 80 | + |
| 81 | + return agent |
| 82 | + |
| 83 | + |
| 84 | +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs: Any) -> None: |
| 85 | + call = await agent.edge.create_call(call_id) |
| 86 | + async with agent.join(call=call, participant_wait_timeout=0): |
| 87 | + await agent.simple_response("Greet the user briefly") |
| 88 | + await agent.finish() |
| 89 | + |
| 90 | + |
| 91 | +if __name__ == "__main__": |
| 92 | + print("\n" + "=" * 60) |
| 93 | + print("Local Transport Voice Agent") |
| 94 | + print("=" * 60) |
| 95 | + print("\nThis agent uses your local microphone, speakers, and optionally camera.") |
| 96 | + |
| 97 | + input_device = select_audio_input_device() |
| 98 | + output_device = select_audio_output_device() |
| 99 | + video_device = select_video_device() |
| 100 | + |
| 101 | + print("Speak into your microphone to interact with the AI.") |
| 102 | + if video_device: |
| 103 | + print("Camera is enabled for video input.") |
| 104 | + print("Press Ctrl+C to stop.\n") |
| 105 | + |
| 106 | + Runner(AgentLauncher(create_agent=create_agent, join_call=join_call)).cli() |
0 commit comments