continuum/docker-compose.yml at main · CambrianTech/continuum · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# Continuum — docker compose up
#
# FIRST-TIME SETUP (fresh clone): populate vendored substrates before build.
#   git submodule update --init --recursive
# The Dockerfiles fail fast with a clear message if you skip this step.
#
# Local:  docker compose up                    (HTTP on localhost, live calls on ws://localhost:7880)
# Grid:   docker compose --profile grid up     (HTTPS via Tailscale, WebRTC over Tailscale mesh)
# GPU:    docker compose --profile gpu up      (adds forge + inference)
# All:    docker compose --profile grid --profile gpu up
#
# Grid mode .env (required):
#   TS_AUTHKEY=tskey-auth-...
#   TS_HOSTNAME=my-machine
#   LIVEKIT_HOST_PORT=0              # disable local livekit host binding (grid uses livekit-grid)
#   LIVEKIT_URL=ws://tailscale:7890  # continuum-core connects to livekit-grid internally
#   LIVEKIT_BROWSER_URL=ws://tailscale:7880  # node-server returns port 7880 to browser (Tailscale serve TLS proxy)

services:

  # ── PostgreSQL (OPT-IN, grid deployments only) ────────────
  # Default install uses SQLite at ~/.continuum/database/main.db — zero-dep,
  # portable, works offline. Postgres is only needed for multi-writer grid
  # setups where multiple continuum-core nodes share state over Tailscale.
  #
  # Enable with:    docker compose --profile postgres up
  # And tell Rust to use it: DATABASE_URL=postgres://continuum:continuum@postgres:5432/continuum
  #
  # When this profile is inactive, `depends_on: postgres` entries on other
  # services are auto-skipped by compose — postgres simply doesn't run.
  postgres:
    profiles: [postgres]
    image: postgres:17-alpine
    restart: unless-stopped
    mem_limit: 512m
    environment:
      POSTGRES_DB: continuum
      POSTGRES_USER: ${POSTGRES_USER:-continuum}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-continuum}
    volumes:
      - pgdata:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U continuum"]
      interval: 5s
      timeout: 3s
      retries: 5

  # ── Model Init (downloads voice/avatar/scene models on first run) ──
  model-init:
    build:
      context: ./src
      dockerfile: ../docker/model-init.Dockerfile
    image: ghcr.io/cambriantech/continuum-model-init:${CONTINUUM_IMAGE_TAG:-latest}
    # One-time downloader. Fixed budget — doesn't scale with host RAM.
    mem_limit: ${MODEL_INIT_MEM:-2g}
    volumes:
      - voice-models:/models
    environment:
      - MODELS_DIR=/models
      - HF_TOKEN=${HF_TOKEN:-}
      # base: small (74MB), fast, works with all whisper.cpp versions.
      # large-v3-turbo requires whisper.cpp v1.7+ (different tensor count).
      - WHISPER_MODEL=${WHISPER_MODEL:-base}

  # ── Continuum Core (Rust) ─────────────────────────────────
  continuum-core:
    build:
      context: ./src/workers
      dockerfile: ../../docker/continuum-core.Dockerfile
      additional_contexts:
        avatars: ./src/models/avatars
        shared-generated: ./src/shared/generated
      args:
        # --no-default-features excludes livekit-webrtc (handled by livekit-bridge).
        # load-dynamic-ort loads ONNX Runtime as shared lib (runtime discovery).
        GPU_FEATURES: "--no-default-features --features load-dynamic-ort"
    image: ghcr.io/cambriantech/continuum-core:${CONTINUUM_IMAGE_TAG:-latest}
    restart: unless-stopped
    # Sized for mission: Qwen 4-8B Q4 + KV cache for 5 personas + embeddings
    # + Bevy render + vision + audio. Auto-calculated by install.sh from host
    # RAM (host_gb - 10 on ≥32GB, host_gb - 8 on 20-31GB, floor 10g). On Mac
    # this service runs NATIVE (docker-compose.mac.yml sets replicas=0), so
    # this limit is unused there — informational. On Linux (continuum-core-
    # cuda / continuum-core-vulkan overlays) it's the actual ceiling.
    mem_limit: ${CONTINUUM_CORE_MEM:-16g}
    working_dir: /app
    # depends_on does NOT include postgres — postgres is opt-in (profile),
    # and by default continuum-core uses SQLite where no startup ordering
    # matters. When users enable the postgres profile and set DATABASE_URL,
    # Rust's PostgresAdapter (deadpool pool) retries connection on startup.
    depends_on:
      livekit-bridge:
        condition: service_healthy
    volumes:
      - voice-models:/app/models:ro
      # Mount the ENTIRE ~/.continuum directory R/W. The Rust core reads config,
      # writes model cache, logs, grid state, sockets, sessions — all under
      # ~/.continuum. Cherry-picking subdirs with :ro caused silent failures
      # whenever the core needed to write to a path that wasn't mounted.
      - ~/.continuum:/root/.continuum
    # GPU access for Bevy headless 3D rendering (avatar snapshots, live video).
    # NOTE: the `runtime:` key is Docker-only — Podman (Rancher alt) doesn't
    # know it and errors out. NVIDIA runtime is now set ONLY in the gpu
    # overlay (docker-compose.gpu.yml), not here in the base file, so this
    # base file stays Podman/Rancher-compatible. Default CPU-fallback for
    # Bevy avatar rendering happens automatically when no runtime is set.
    environment:
      # DATABASE_URL is OPTIONAL. Unset → Rust defaults to SQLite at
      # ~/.continuum/database/main.db (mounted from host via the
      # ~/.continuum volume). Set to `postgres://continuum:continuum@postgres:5432/continuum`
      # only when running the `postgres` compose profile. Rust's
      # resolve_handle('main') reads this env to pick the adapter.
      - DATABASE_URL=${DATABASE_URL:-}
      - AVATAR_MODELS_DIR=/app/avatars
      - LIVEKIT_URL=${LIVEKIT_URL:-ws://livekit:7880}
      - NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-all}
      - NVIDIA_DRIVER_CAPABILITIES=compute,graphics,utility
      # HuggingFace cache inside persistent ~/.continuum so model downloads survive restarts
      - HF_HOME=/root/.continuum/hf_cache
    healthcheck:
      # Verify IPC responds, not just socket exists. Uses archive-worker binary
      # (already in image) as a lightweight ping — connects, sends ping, checks response.
      # Falls back to socket file check if binary missing.
      test: ["CMD-SHELL", "test -S /root/.continuum/sockets/continuum-core.sock"]
      interval: 10s
      timeout: 5s
      start_period: 30s
      retries: 3

  # ── LiveKit Bridge (Rust — WebRTC transport adapter) ──────
  # Links webrtc-sys but NOT ort. Separate process eliminates
  # the protobuf symbol conflict that deadlocked continuum-core.
  livekit-bridge:
    build:
      context: ./src/workers
      dockerfile: ../../docker/livekit-bridge.Dockerfile
    image: ghcr.io/cambriantech/continuum-livekit-bridge:${CONTINUUM_IMAGE_TAG:-latest}
    restart: unless-stopped
    # WebRTC encode/decode buffers + multi-stream. Scales with host RAM —
    # install.sh sets LIVEKIT_BRIDGE_MEM to max(2, host_gb/8). Default 2g
    # for manual docker compose users; install.sh writes the calculated one.
    mem_limit: ${LIVEKIT_BRIDGE_MEM:-2g}
    depends_on:
      - livekit
    volumes:
      - ~/.continuum/sockets:/root/.continuum/sockets
    environment:
      - LIVEKIT_URL=${LIVEKIT_URL:-ws://livekit:7880}
    healthcheck:
      test: ["CMD", "test", "-S", "/root/.continuum/sockets/livekit-bridge.sock"]
      interval: 5s
      timeout: 3s
      retries: 5

  # ── Node Server (TypeScript) ──────────────────────────────
  node-server:
    build:
      context: ./src
      dockerfile: ../docker/node-server.Dockerfile
    image: ghcr.io/cambriantech/continuum-node:${CONTINUUM_IMAGE_TAG:-latest}
    restart: unless-stopped
    # TS orchestrator + IPC buffers + RAG state. Scales with host RAM —
    # install.sh sets NODE_SERVER_MEM to max(2, host_gb/8). Default 2g
    # for manual docker compose users.
    mem_limit: ${NODE_SERVER_MEM:-2g}
    # depends_on omits postgres — it's in the `postgres` profile (opt-in).
    # node-server doesn't connect to postgres directly anyway; all data ops
    # flow through continuum-core via IPC with opaque handles.
    depends_on:
      continuum-core:
        condition: service_healthy
    ports:
      - "${NODE_WS_PORT:-9001}:9001"   # WebSocket
    volumes:
      - ~/.continuum:/root/.continuum
    environment:
      # node-server never directly connects to a database — all data ops
      # go through continuum-core via IPC, using opaque handles ('main' for
      # the primary DB, persona UUIDs for per-persona DBs). Rust resolves
      # handles to concrete backends. DATABASE_URL intentionally NOT set
      # here: the old value leaked across the TS→Rust IPC boundary as a
      # connection string and broke Mac Option B where docker-DNS doesn't
      # resolve from the native host.
      - NODE_ENV=production
      - JTAG_SKIP_HTTP=1
      - JTAG_NO_TLS=1
      - LIVEKIT_URL=${LIVEKIT_BROWSER_URL:-ws://livekit:7880}

  # ── Widget Server (Vite) ──────────────────────────────────
  widget-server:
    build:
      context: ./src
      dockerfile: ../docker/widget-server.Dockerfile
    image: ghcr.io/cambriantech/continuum-widgets:${CONTINUUM_IMAGE_TAG:-latest}
    restart: unless-stopped
    mem_limit: 512m
    depends_on:
      - node-server
    ports:
      - "9003:9003"   # HTTP
    volumes:
      - ~/.continuum/config.env:/root/.continuum/config.env:ro
    environment:
      - JTAG_HTTP_PORT=9003
      - JTAG_WEBSOCKET_PORT=${NODE_WS_PORT:-9001}
      # WS proxy: widget-server forwards browser WebSocket to node-server container
      - JTAG_WS_PROXY_HOST=node-server
      - JTAG_WS_PROXY_PORT=9001

  # ── LiveKit (WebRTC) — local mode ───────────────────────────
  # Dev server for local development. Always starts.
  # In grid mode, set LIVEKIT_HOST_PORT=0 in .env to avoid port conflict with tailscale.
  # (LiveKit still runs but on unmapped ports — harmless, ~50MB RAM.)
  livekit:
    image: livekit/livekit-server:latest
    restart: unless-stopped
    mem_limit: 256m
    ports:
      - "${LIVEKIT_HOST_PORT:-7880}:7880"     # WebSocket signalling
      - "${LIVEKIT_HOST_RTC_TCP:-7881}:7881"  # WebRTC TCP
      - "${LIVEKIT_HOST_RTC_UDP:-7882}:7882/udp" # WebRTC UDP
    command: ["--dev", "--bind", "0.0.0.0", "--node-ip", "127.0.0.1"]

  # ── LiveKit (WebRTC) — grid mode ──────────────────────────
  # Shares Tailscale's network namespace so WebRTC media ports (7881/7882)
  # are directly on the Tailscale interface — real UDP, no relay, no TURN.
  # Signaling proxied by Tailscale serve: wss://7880 → localhost:7890.
  livekit-grid:
    image: livekit/livekit-server:latest
    restart: unless-stopped
    mem_limit: 256m
    profiles: ["grid"]
    network_mode: "service:tailscale"
    depends_on:
      - tailscale
    volumes:
      - ./docker/livekit.yaml:/etc/livekit.yaml:ro
      - ./docker/livekit-entrypoint.sh:/entrypoint.sh:ro
    entrypoint: ["/entrypoint.sh"]

  # ── Tailscale (grid profile — remote HTTPS access) ────────
  # Joins tailnet, provisions real Let's Encrypt certs, reverse-proxies
  # HTTPS/WSS to plain HTTP containers. Only needed for remote access.
  #
  # TS_AUTHKEY from ~/.continuum/config.env (same as all other API keys).
  # Once registered, state volume persists identity — key only needed for first run.
  tailscale:
    image: tailscale/tailscale:latest
    hostname: ${TS_HOSTNAME:-${HOSTNAME}-grid}
    restart: unless-stopped
    profiles: ["grid"]
    environment:
      - TS_AUTHKEY=${TS_AUTHKEY:-}
      - TS_STATE_DIR=/var/lib/tailscale
      - TS_SERVE_CONFIG=/config/tailscale-serve.json
      - TS_USERSPACE=false
    volumes:
      - tailscale-state:/var/lib/tailscale
      - ./docker/tailscale-serve.json:/config/tailscale-serve.json:ro
    devices:
      - /dev/net/tun:/dev/net/tun
    cap_add:
      - net_admin
      - net_raw
    ports:
      - "443:443"      # HTTPS → widget-server (Tailscale serve proxies to widget-server:9003)
      # LiveKit 7880 NOT mapped here — local livekit already binds it on the host.
      # livekit-grid is in Tailscale's network namespace, reachable via Tailscale IP directly.

  # ── Forge Worker (sentinel-ai) ────────────────────────────
  forge-worker:
    build: ../sentinel-ai
    # forge-worker is built and published by the sibling sentinel-ai repo
    # (https://github.com/CambrianTech/sentinel-ai), which has its own release
    # cadence independent of continuum's PR cycle. It does NOT get tagged with
    # continuum's :pr-<N> or :<sha> — its tags are :latest + commit-shas of
    # sentinel-ai pushes only. Coupling this to CONTINUUM_IMAGE_TAG made
    # `docker compose --profile gpu pull` on a continuum PR tag fail with
    # "manifest unknown" (caught 2026-04-23 during PR #950 Carl-GPU testing).
    # Uses FORGE_WORKER_IMAGE_TAG (default :latest) so the two repos stay
    # independently versioned.
    image: ghcr.io/cambriantech/forge-worker:${FORGE_WORKER_IMAGE_TAG:-latest}
    profiles: ["gpu"]
    mem_limit: 28g
    deploy:
      resources:
        limits:
          memory: 28g
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    volumes:
      - forge-output:/app/output
      - hf-cache:/root/.cache/huggingface
      - models:/app/models
      - ../forge-alloy/examples:/app/alloys:ro
    environment:
      - CUDA_VISIBLE_DEVICES=0
      - HF_TOKEN=${HF_TOKEN:-}
      - PYTHONUNBUFFERED=1

  # ── Inference Server (GPU nodes only) ──────────────────────
  inference:
    # Pinned to a specific upstream digest. The floating `:server-cuda` tag is
    # rebuilt by ggml-org on every merge to llama.cpp main; if Carl pulls on a
    # day when upstream rolls a breaking change, every install silently breaks
    # with no signal pointing at the cause and no way for us to reproduce. Pin
    # forces deliberate updates where we verify behavior parity first. Bump
    # the digest in a follow-up PR after smoke-testing the new upstream build.
    # Issue #955.
    image: ghcr.io/ggml-org/llama.cpp:server-cuda@sha256:11b71618f3f4b9c98e42818c058e37b62478f474806b4107ab698abd0be900f6
    restart: unless-stopped
    profiles: ["gpu"]
    mem_limit: 8g
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    volumes:
      - models:/models
      - forge-output:/app/output:ro
    command: ["-m", "/models/current.gguf", "-c", "4096", "-ngl", "99", "--port", "8090", "--host", "0.0.0.0"]

volumes:
  pgdata:
  hf-cache:
  forge-output:
  models:
  voice-models:
  tailscale-state: