-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathconfig.example.toml
More file actions
133 lines (127 loc) · 7.69 KB
/
config.example.toml
File metadata and controls
133 lines (127 loc) · 7.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
[atoma_service]
chat_completions_service_urls = { "Infermatic/Llama-3.3-70B-Instruct-FP8-Dynamic" = [
[
"http://chat-completions1:8000",
"vllm1",
256,
],
[
"http://chat-completions2:8000",
"vllm2",
256,
],
[
"http://chat-completions3:8000",
"vllm3",
256,
],
[
"http://chat-completions4:8000",
"vllm4",
256,
],
[
"http://chat-completions5:8000",
"vllm5",
256,
],
[
"http://chat-completions6:8000",
"vllm6",
256,
],
[
"http://chat-completions7:8000",
"vllm7",
256,
],
[
"http://chat-completions8:8000",
"vllm8",
256,
],
] }
embeddings_service_url = "http://embeddings:80"
image_generations_service_url = "http://image-generations:80"
# List of models to be used by the service, the current value here is just a placeholder, please change it to the models you want to deploy
environment = "development" # or "production" (for use in sentry, you need to set the Sentry DSN)
heartbeat_url = "my-heartbeat-url"
limit_number_of_requests_per_interval = 3 # Limit the number of requests per interval, this is used to limit the number of requests that can be sent to the service in a given interval, if the number of requests exceeds this value, the service will not accept new requests
limit_request_interval_ms = 1000 # Interval in milliseconds for limiting the number of requests
max_num_queued_requests = 1 # Maximum number of queued requests for each inference service, this is used to limit the number of requests that can be queued for each service, if the number of queued requests exceeds this value, the service will be considered overloaded and will not accept new requests
memory_lower_threshold = 0.75 # Lower threshold for memory usage, if the memory usage goes below this value, the service will not be considered overloaded
memory_upper_threshold = 0.9 # Upper threshold for memory usage, if the memory usage goes above this value, the service will be considered overloaded
metrics_update_interval = 30
models = [ "Infermatic/Llama-3.3-70B-Instruct-FP8-Dynamic" ]
proxy_service_url = "https://api.atoma.network" # URL of the proxy service, used for confidential computing (sending attestations).
requests_batcher_interval_ms = 300 # Batching interval for requests in milliseconds, this is used to batch requests before sending them to the inference service
revisions = [ "main" ]
sentry_dsn = "" # Sentry DSN (for use in sentry, you need to set the Sentry DSN)
service_bind_address = "0.0.0.0:3000"
too_many_requests_timeout_ms = 2000 # Timeout for too many requests flag in milliseconds
whitelist_sui_addresses_for_fiat = [ ] # Sui addresses that are allowed to use fiat payments
[atoma_sui]
atoma_db = "0x02920289f426dd1f3c2572d613f7dc92be95041720864a73d44d65585530efc5" # Current ATOMA DB object ID for testnet
atoma_package_id = "0x8903298ba49a8e83d438e014b2cfd18404324f3a0274b9507b520d5745b85208" # Current ATOMA package ID for testnet
cursor_path = "/app/data/cursor.toml" # Path to the Sui events cursor file
http_rpc_node_addr = "https://fullnode.testnet.sui.io:443" # Current RPC node address for testnet
limit = 100 # Some reference value
max_concurrent_requests = 10 # Some reference value
node_small_ids = [ 1 ] # List of node IDs under control of the node wallet
request_timeout = { secs = 300, nanos = 0 } # Some reference value
sui_config_path = "/root/.sui/sui_config/client.yaml" # Path to the Sui client configuration file, accessed from the docker container (if this is not the case, pass in the full path, on your host machine which is by default ~/.sui/sui_config/client.yaml)
sui_keystore_path = "/root/.sui/sui_config/sui.keystore" # Path to the Sui keystore file, accessed from the docker container (if this is not the case, pass in the full path, on your host machine which is by default ~/.sui/sui_config/sui.keystore)
usdc_package_id = "0xa1ec7fc00a6f40db9693ad1415d0c193ad3906494428cf252621037bd7117e29" # Current USDC package ID for testnet
[atoma_state]
# Path inside the container
# Replace the placeholder values with the ones for your local environment (in the .env file)
database_url = "postgres://<POSTGRES_USER>:<POSTGRES_PASSWORD>@postgres-db:5432/<POSTGRES_DB>"
[atoma_daemon]
# WARN: Do not expose this port to the public internet, as it is used only for internal communication between the Atoma Node and the Atoma Network
service_bind_address = "0.0.0.0:3001"
# Replace the placeholder values with the actual node badge and small ID assigned by the Atoma's smart contract, upon node registration
node_badges = [
[
"0x268e6af9502dcdcaf514bb699c880b37fa1e8d339293bc4f331f2dde54180600",
1,
],
] # List of node badges, where each badge is a tuple of (badge_id, small_id), both values are assigned once the node registers itself
[atoma_p2p]
# Interval for sending heartbeat messages to peers (in seconds)
heartbeat_interval = { secs = 30, nanos = 0 }
# Maximum duration a connection can remain idle before closing (in seconds)
idle_connection_timeout = { secs = 60, nanos = 0 }
# Address to listen for incoming QUIC connections (format: "/ip4/x.x.x.x/udp/x")
# Address to listen for incoming QUIC connections (format: "/ip4/x.x.x.x/udp/x")
listen_addrs = [ "/ip4/0.0.0.0/tcp/4001", "/ip4/0.0.0.0/udp/4001/quic-v1" ]
# The list of bootstrap nodes to dial
bootstrap_node_addrs = [ "/ip4/213.130.147.75/tcp/8083", "/ip4/213.130.147.75/udp/8083/quic-v1" ]
# The list of bootstrap node peer IDs to connect to, the latter is the peer ID of the dev bootstrap node
bootstrap_node_peer_ids = [
"12D3KooWBLv3tmR3PY9gTSfp1yYNL3ST2v3ZpmHxgASjnycUurmC",
"12D3KooWHXsXfELpyB91QUXebbLMLSQDB3kcGyogn4pogABSj1eZ",
]
# Node's small ID (assigned by Atoma smart contract)
node_small_id = 1
# The HTTP(s) public URL of the node
public_url = "https://<PUBLIC_URL>"
# required, replace this with the country (ISO 3166-1 alpha-2) of the node (https://www.iso.org/obp/ui/#search/code/
country = ""
# List of endpoints serving metrics to collect, in the form of a map of model name to a tuple of (serving_engine, job_names)
# (e.g. `"meta-llama/Llama-3.2-3B-Instruct" = ["vllm", ["vllm1", "vllm2", "vllm3", "vllm4"]]`)
[atoma_p2p.metrics_endpoints]
"Infermatic/Llama-3.3-70B-Instruct-FP8-Dynamic" = [
"vllm",
[
"vllm1",
"vllm2",
"vllm3",
"vllm4",
"vllm5",
"vllm6",
"vllm7",
"vllm8",
],
]
# The path to the local key
local_key = "/app/data/local_key"