-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcompose.yml
More file actions
70 lines (69 loc) · 1.97 KB
/
compose.yml
File metadata and controls
70 lines (69 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
services:
vllm:
image: vllm/vllm-openai:latest
container_name: vllm
command: >
serve --config /models/gpt-oss-20b/vllm_config.yaml
runtime: nvidia # required for GPU support
environment:
VLLM_ATTENTION_BACKEND: "TRITON_ATTN"
TIKTOKEN_RS_CACHE_DIR: "/models/gpt-oss-20b/TIKTOKEN/"
TIKTOKEN_CACHE_DIR: "/models/gpt-oss-20b/TIKTOKEN/"
TIKTOKEN_ENCODINGS_BASE: "/models/gpt-oss-20b/TIKTOKEN/"
ports:
- "8888:8888"
volumes:
- "${HOME}/gpt-oss-20b:/models/gpt-oss-20b"
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8888/metrics"]
interval: 10s # run every 5 seconds
timeout: 5s # give each probe 2 s to answer
retries: 6 # 6 × 5 s = 30 s before considered unhealthy
start_period: 60s # give the container a little warm‑up time
spectrum_server:
build:
context: spectrum_server
dockerfile: Dockerfile
image: spectrum_server:latest
container_name: spectrum_server
command: ["python", "-m", "spectrum_server"]
env_file:
- env.example
ports:
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 10s
timeout: 5s
retries: 6
start_period: 60s
agent:
build:
context: agent
dockerfile: Dockerfile
image: agent:latest
container_name: agent
command: ["--server"]
ports:
- 8001:8001
env_file:
- env.example
volumes:
- /etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt:ro
depends_on:
vllm:
condition: service_healthy
spectrum_server:
condition: service_healthy
mongodb:
image: mongo:8.0
ports:
- 27018:27018
command: mongod --port 27018