forked from DreamLab-AI/origin-logseq-AR
-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathdocker-compose.voice.yml
More file actions
125 lines (120 loc) · 3.55 KB
/
docker-compose.voice.yml
File metadata and controls
125 lines (120 loc) · 3.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Voice-to-Voice Real-Time Audio Services
# Usage: docker-compose -f docker-compose.yml -f docker-compose.voice.yml --profile dev up
#
# Architecture:
# LiveKit SFU — WebRTC spatial audio mixer for user-to-user + agent voice
# Turbo Whisper — faster-whisper with streaming WebSocket endpoint (replaces polling)
# Kokoro TTS — OpenAI-compatible TTS with per-agent voice presets
#
# Audio format: Opus throughout (48kHz, mono, 64kbps)
services:
# LiveKit SFU — WebRTC Selective Forwarding Unit for spatial voice chat
# All user-to-user voice and agent spatial audio routes through here
livekit:
image: livekit/livekit-server:v1.7
container_name: visionflow-livekit
hostname: livekit
command: --config /etc/livekit.yaml
environment:
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY:-visionflow}
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET:-changeme}
ports:
- "7880:7880" # HTTP API + WebSocket signaling
- "7881:7881" # RTC over TCP
- "7882:7882/udp" # RTC over UDP (primary)
- "50000-50200:50000-50200/udp" # WebRTC media ports
volumes:
- ./config/livekit.yaml:/etc/livekit.yaml:ro
networks:
docker_ragflow:
aliases:
- livekit
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:7880"]
interval: 10s
timeout: 5s
retries: 3
start_period: 5s
restart: unless-stopped
profiles:
- development
- dev
- production
- prod
# Turbo Whisper — faster-whisper with streaming WebSocket STT
# Replaces the polling-based whisper-webui-backend with direct streaming
turbo-whisper:
image: fedirz/faster-whisper-server:latest-cuda
container_name: visionflow-turbo-whisper
hostname: turbo-whisper
environment:
- WHISPER__MODEL=Systran/faster-whisper-large-v3
- WHISPER__DEVICE=cuda
- WHISPER__COMPUTE_TYPE=float16
- WHISPER__LANGUAGE=en
- WHISPER__VAD_FILTER=true
# Streaming mode: returns partial results as they arrive
- WHISPER__BEAM_SIZE=1
- WHISPER__BEST_OF=1
ports:
- "8100:8000" # OpenAI-compatible REST + WebSocket
networks:
docker_ragflow:
aliases:
- turbo-whisper
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
runtime: nvidia
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 30s
restart: unless-stopped
profiles:
- development
- dev
- production
- prod
# Kokoro TTS — Text-to-speech with distinct per-agent voice presets
# OpenAI-compatible /v1/audio/speech endpoint, Opus output
kokoro-tts:
image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
container_name: visionflow-kokoro-tts
hostname: kokoro-tts
environment:
- KOKORO_DEFAULT_VOICE=af_heart
- KOKORO_DEFAULT_FORMAT=opus
- KOKORO_SAMPLE_RATE=48000
ports:
- "8880:8880"
networks:
docker_ragflow:
aliases:
- kokoro-tts
- kokoro-tts-container
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
interval: 15s
timeout: 5s
retries: 3
start_period: 20s
restart: unless-stopped
profiles:
- development
- dev
- production
- prod
networks:
docker_ragflow:
external: true
volumes:
whisper-models:
name: visionflow-whisper-models
driver: local