GetStream
diff --git a/‎.github/workflows/run_tests.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/run_tests.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎agents-core/pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎agents-core/pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/10_local_transport_example/README.md‎
Lines changed: 127 additions & 0 deletions b/‎examples/10_local_transport_example/README.md‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎examples/10_local_transport_example/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/10_local_transport_example/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/10_local_transport_example/local_transport_example.py‎
Lines changed: 106 additions & 0 deletions b/‎examples/10_local_transport_example/local_transport_example.py‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎examples/10_local_transport_example/pyproject.toml‎
Lines changed: 20 additions & 0 deletions b/‎examples/10_local_transport_example/pyproject.toml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎plugins/local/README.md‎ b/‎plugins/local/README.md‎
diff --git a/‎plugins/local/py.typed‎ b/‎plugins/local/py.typed‎
diff --git a/‎plugins/local/pyproject.toml‎
Lines changed: 42 additions & 0 deletions b/‎plugins/local/pyproject.toml‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎plugins/local/tests/__init__.py‎ b/‎plugins/local/tests/__init__.py‎
@@ -84,6 +84,8 @@ jobs:
           sudo rm -rf "/usr/local/lib/android" || true
           sudo rm -rf "/usr/local/share/boost" || true
           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y libportaudio2
       - name: Install dependencies
         uses: ./.github/actions/python-uv-setup
       - name: Run core tests
 
@@ -73,6 +73,7 @@ turbopuffer = ["vision-agents-plugins-turbopuffer"]
 mistral = ["vision-agents-plugins-mistral"]
 assemblyai = ["vision-agents-plugins-assemblyai"]
 redis = ["redis[hiredis]>=5.0.0"]
+local = ["vision-agents-plugins-local"]
 
 all-plugins = [
     "vision-agents-plugins-anthropic",
 
@@ -0,0 +1,127 @@
+# Local Transport Example
+
+This example demonstrates how to run a vision agent using local audio/video I/O (microphone, speakers, and camera) instead of a cloud-based edge network.
+
+## Overview
+
+The LocalEdge provides:
+
+- **Microphone input**: Captures audio from your microphone
+- **Speaker output**: Plays AI responses on your speakers
+- **Camera input**: Captures video from your camera (optional)
+- **No cloud dependencies**: Media runs locally (except for the LLM, TTS, and STT services)
+
+## Examples
+
+There are two example scripts:
+
+### 1. Basic Voice Agent (`local_transport_example.py`)
+
+Uses Gemini LLM with Deepgram STT and ElevenLabs TTS for a voice-only experience.
+
+```bash
+uv run python local_transport_example.py
+```
+
+### 2. Vision Agent with Gemini Realtime (`local_transport_realtime_example.py`)
+
+Uses Gemini Realtime for native audio/video understanding. This lets Gemini see through your camera!
+
+```bash
+uv run python local_transport_realtime_example.py
+```
+
+Try asking: "What do you see?" or "Describe what's in front of you"
+
+## Prerequisites
+
+1. A working microphone and speakers
+2. A camera (optional for basic example, recommended for realtime example)
+3. API keys:
+
+### For basic example:
+
+- Google AI (for Gemini LLM)
+- Deepgram (for STT)
+- ElevenLabs (for TTS)
+
+### For realtime example:
+
+- Google AI (for Gemini Realtime) - handles audio/video natively
+
+## Setup
+
+1. Create a `.env` file with your API keys:
+
+```bash
+GOOGLE_API_KEY=your_google_api_key
+DEEPGRAM_API_KEY=your_deepgram_api_key
+ELEVENLABS_API_KEY=your_elevenlabs_api_key
+```
+
+2. Install dependencies:
+
+```bash
+cd examples/10_local_transport_example
+uv sync
+```
+
+## Device Selection
+
+Both examples will prompt you to select:
+
+1. **Input device** (microphone)
+2. **Output device** (speakers)
+3. **Video device** (camera) - can be skipped by entering 'n'
+
+Press Enter to use the default device, or enter a number to select a specific device.
+
+Press `Ctrl+C` to stop the agent.
+
+## Listing Audio Devices
+
+To see available audio devices on your system:
+
+```python
+from vision_agents.plugins.local.devices import list_audio_input_devices, list_audio_output_devices
+
+list_audio_input_devices()
+list_audio_output_devices()
+```
+
+## Configuration
+
+You can customize the audio settings when creating the LocalEdge:
+
+```python
+from vision_agents.plugins.local import LocalEdge
+from vision_agents.plugins.local.devices import (
+    select_audio_input_device,
+    select_audio_output_device,
+)
+
+input_device = select_audio_input_device()
+output_device = select_audio_output_device()
+
+edge = LocalEdge(
+    audio_input=input_device,  # AudioInputDevice (microphone)
+    audio_output=output_device,  # AudioOutputDevice (speakers)
+)
+```
+
+## Troubleshooting
+
+### No audio input/output
+
+1. Check that your microphone and speakers are properly connected
+2. Run `list_audio_input_devices()` or `list_audio_output_devices()` to see available devices
+3. Try specifying explicit device indices in the LocalEdge constructor
+
+### Audio quality issues
+
+- Try increasing the `blocksize` parameter for smoother audio
+- Ensure your microphone isn't picking up too much background noise
+
+### Permission errors
+
+On macOS, you may need to grant microphone permissions to your terminal application.
@@ -0,0 +1 @@
+# Local Transport Example
@@ -0,0 +1,106 @@
+"""
+Local Transport Example
+
+Demonstrates using LocalTransport for local audio/video I/O with vision agents.
+This enables running agents using your microphone, speakers, and camera without
+cloud-based edge infrastructure.
+
+Usage:
+    uv run python local_transport_example.py run
+
+Requirements:
+    - Working microphone and speakers
+    - Optional: Camera for video input
+    - API keys for Gemini, Deepgram, and ElevenLabs in .env file
+"""
+
+import logging
+from typing import Any
+
+from dotenv import load_dotenv
+from vision_agents.core import Agent, AgentLauncher, Runner, User
+from vision_agents.core.utils.examples import get_weather_by_location
+from vision_agents.plugins import deepgram, gemini
+from vision_agents.plugins.local import LocalEdge
+from vision_agents.plugins.local.devices import (
+    select_audio_input_device,
+    select_audio_output_device,
+    select_video_device,
+)
+
+logger = logging.getLogger(__name__)
+
+load_dotenv()
+
+INSTRUCTIONS = (
+    "You're a helpful voice AI assistant running on the user's local machine. "
+    "Keep responses short and conversational. Don't use special characters or "
+    "formatting. Be friendly and helpful."
+)
+
+
+def setup_llm(model: str = "gemini-3.1-flash-lite-preview") -> gemini.LLM:
+    llm = gemini.LLM(model)
+
+    @llm.register_function(description="Get current weather for a location")
+    async def get_weather(location: str) -> dict[str, Any]:
+        return await get_weather_by_location(location)
+
+    return llm
+
+
+async def create_agent() -> Agent:
+    llm = setup_llm()
+
+    if input_device is None:
+        raise RuntimeError("No audio input device available")
+    if output_device is None:
+        raise RuntimeError("No audio output device available")
+
+    logger.info(f"Using input: {input_device.name} ({input_device.sample_rate}Hz)")
+    logger.info(f"Using output: {output_device.name} ({output_device.sample_rate}Hz)")
+    if video_device:
+        logger.info(f"Using video device: {video_device.name}")
+
+    transport = LocalEdge(
+        audio_input=input_device,
+        audio_output=output_device,
+        video_input=video_device,
+    )
+
+    agent = Agent(
+        edge=transport,
+        agent_user=User(name="Local AI Assistant", id="local-agent"),
+        instructions=INSTRUCTIONS,
+        processors=[],
+        llm=llm,
+        tts=deepgram.TTS(),
+        stt=deepgram.STT(eager_turn_detection=True),
+    )
+
+    return agent
+
+
+async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs: Any) -> None:
+    call = await agent.edge.create_call(call_id)
+    async with agent.join(call=call, participant_wait_timeout=0):
+        await agent.simple_response("Greet the user briefly")
+        await agent.finish()
+
+
+if __name__ == "__main__":
+    print("\n" + "=" * 60)
+    print("Local Transport Voice Agent")
+    print("=" * 60)
+    print("\nThis agent uses your local microphone, speakers, and optionally camera.")
+
+    input_device = select_audio_input_device()
+    output_device = select_audio_output_device()
+    video_device = select_video_device()
+
+    print("Speak into your microphone to interact with the AI.")
+    if video_device:
+        print("Camera is enabled for video input.")
+    print("Press Ctrl+C to stop.\n")
+
+    Runner(AgentLauncher(create_agent=create_agent, join_call=join_call)).cli()
@@ -0,0 +1,20 @@
+[project]
+name = "local-transport-example"
+version = "0.0.0"
+requires-python = ">=3.10"
+
+# Dependencies for local audio transport
+dependencies = [
+    "python-dotenv>=1.0",
+    "vision-agents-plugins-deepgram",
+    "vision-agents-plugins-elevenlabs",
+    "vision-agents-plugins-gemini",
+    "vision-agents-plugins-local",
+]
+
+[tool.uv.sources]
+"vision-agents-plugins-deepgram" = { path = "../../plugins/deepgram", editable = true }
+"vision-agents-plugins-elevenlabs" = { path = "../../plugins/elevenlabs", editable = true }
+"vision-agents-plugins-gemini" = { path = "../../plugins/gemini", editable = true }
+"vision-agents-plugins-local" = { path = "../../plugins/local", editable = true }
+"vision-agents" = { path = "../../agents-core", editable = true }
@@ -0,0 +1,42 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "vision-agents-plugins-local"
+dynamic = ["version"]
+description = "Local audio & video integration for Vision Agents"
+readme = "README.md"
+keywords = ["local", "AI", "voice agents", "agents"]
+requires-python = ">=3.10"
+license = "MIT"
+dependencies = [
+    "vision-agents",
+    "sounddevice>=0.5.0",
+    "aiortc>=1.14.0, <1.15.0",
+    "av>=14.2.0, <17",
+]
+
+[project.urls]
+Documentation = "https://visionagents.ai/"
+Website = "https://visionagents.ai/"
+Source = "https://github.com/GetStream/Vision-Agents"
+
+[tool.hatch.version]
+source = "vcs"
+raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
+
+[tool.hatch.build.targets.wheel]
+packages = ["."]
+
+[tool.hatch.build.targets.sdist]
+include = ["/vision_agents"]
+
+[tool.uv.sources]
+vision-agents = { workspace = true }
+
+[dependency-groups]
+dev = [
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.0.0",
+]