Simo.js/docker-compose.yml at master · code-n-beer/Simo.js · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
version: '2'
services:

  pythonsimo:
    build:
      context: .
      dockerfile: Dockerfile_pythonsimo
    expose:
      - "8888"
    links:
      - redis
    restart: always


  simojs:
    links:
     - redis
     - pythonsimo
     - influxdb
     - llama
    build: .
    extra_hosts:
    - "host.docker.internal:host-gateway"
    volumes:
      - "./simojs-data/:/simojs-data"
      - "./templates/:/templates"
    restart: always
    ports:
     - "127.0.0.1:9229:9229"

  llama:
    #image: ghcr.io/ggerganov/llama.cpp:server
    image: ghcr.io/ggml-org/llama.cpp:server # new address
    #command: -m /models/llamafi-combo-1k-Q5_1.gguf -c 128 --host 0.0.0.0 --port 8111 --threads 8 --mlock
    #command: -m /models/llamafi-combo-1k-Q5_1.gguf -c 128 --host 0.0.0.0 --port 8111 --threads 8 --mlock
    #command: -m /models/viking-13b-q6_k.gguf -c 128 --host 0.0.0.0 --port 8111 --threads 8 --mlock
    command: -m /models/viking-13b-q5_k_m.gguf -c 128 --host 0.0.0.0 --port 8111 --threads 8 --mlock
    #command: -m /models/finnish-llama2-v0.2-Q5_1.gguf -c 128 --host 0.0.0.0 --port 8111 --threads 8 --mlock
    volumes:
      - "./models:/models"
    ports:
      - "8111:8111"
    restart: always
    ulimits:
      memlock:
        soft: -1  # Allow unlimited locked memory (or a specific large value)
        hard: -1  # Allow unlimited locked memory (or a specific large value)


  redis:
    restart: always
    build:
      context: .
      dockerfile: Dockerfile_redis
    volumes:
      - "./redis-data/:/redis-data"

  influxdb:
    image: influxdb:latest
    container_name: influxdb
    volumes:
      - "./influxdb_data:/var/lib/influxdb"

# GPU llama
# llama:  # Service name acts as hostname within the network
#   image: ghcr.io/ggerganov/llama.cpp:server-cuda
#   command: -m /models/llamafi-combo-1k-Q5_1.gguf -c 256 --host 0.0.0.0 --port 8111 --threads 8 --mlock --n-gpu-layers 99
#   #command: -m /models/finnish-llama2-v0.2-Q5_1.gguf -c 128 --host 0.0.0.0 --port 8111 --threads 8 --mlock
#   volumes:
#     - "./models:/models"  # Adjust this according to your actual models directory
#   ports:
#     - "8111:8111"
#   restart: always
#   deploy:
#     resources:
#       reservations:
#         devices:
#           - driver: nvidia
#             count: 1
#             capabilities: [gpu]
#       limits:
#         memory: 8G
#   memswap_limit: 8G
#   environment:
#       - NVIDIA_VISIBLE_DEVICES=all