|
2 | 2 | # This file defines which metrics should be enabled and their configurations |
3 | 3 | log_interval: 5 # Interval in seconds for logging metrics |
4 | 4 |
|
5 | | -prometheus: |
6 | | - multiproc_dir: "/vllm-workspace" # Directory for Prometheus multiprocess mode |
| 5 | +multiproc_dir: "/vllm-workspace" # Directory for Prometheus multiprocess mode |
7 | 6 |
|
8 | | - metric_prefix: "ucm:" |
9 | | - |
10 | | - # Enable/disable metrics by category |
11 | | - enabled_metrics: |
12 | | - counters: true |
13 | | - gauges: true |
14 | | - histograms: true |
15 | | - |
16 | | - # Counter metrics configuration |
17 | | - # counters: |
18 | | - # - name: "received_requests" |
19 | | - # documentation: "Total number of requests sent to ucm" |
20 | | - |
21 | | - # Gauge metrics configuration |
22 | | - # gauges: |
23 | | - # - name: "lookup_hit_rate" |
24 | | - # documentation: "Hit rate of ucm lookup requests since last log" |
25 | | - # multiprocess_mode: "livemostrecent" |
26 | | - |
27 | | - # Histogram metrics configuration |
28 | | - histograms: |
29 | | - - name: "load_requests_num" |
30 | | - documentation: "Number of requests loaded from ucm" |
31 | | - buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000] |
32 | | - - name: "load_blocks_num" |
33 | | - documentation: "Number of blocks loaded from ucm" |
34 | | - buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
35 | | - - name: "load_duration" |
36 | | - documentation: "Time to load from ucm (ms)" |
37 | | - buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
38 | | - - name: "load_speed" |
39 | | - documentation: "Speed of loading from ucm (GB/s)" |
40 | | - buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100] |
41 | | - - name: "save_requests_num" |
42 | | - documentation: "Number of requests saved to ucm" |
43 | | - buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000] |
44 | | - - name: "save_blocks_num" |
45 | | - documentation: "Number of blocks saved to ucm" |
46 | | - buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
47 | | - - name: "save_duration" |
48 | | - documentation: "Time to save to ucm (ms)" |
49 | | - buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
50 | | - - name: "save_speed" |
51 | | - documentation: "Speed of saving to ucm (GB/s)" |
52 | | - buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100] |
53 | | - - name: "interval_lookup_hit_rates" |
54 | | - documentation: "Hit rates of ucm lookup requests" |
55 | | - buckets: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] |
| 7 | +metric_prefix: "ucm:" |
| 8 | + |
| 9 | +# Counter metrics configuration |
| 10 | +# counter: |
| 11 | +# - name: "received_requests" |
| 12 | +# documentation: "Total number of requests sent to ucm" |
| 13 | + |
| 14 | +# Gauge metrics configuration |
| 15 | +# gauge: |
| 16 | +# - name: "lookup_hit_rate" |
| 17 | +# documentation: "Hit rate of ucm lookup requests since last log" |
| 18 | +# multiprocess_mode: "livemostrecent" |
| 19 | + |
| 20 | +# Histogram metrics configuration |
| 21 | +histogram: |
| 22 | + - name: "load_requests_num" |
| 23 | + documentation: "Number of requests loaded from ucm" |
| 24 | + buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000] |
| 25 | + - name: "load_blocks_num" |
| 26 | + documentation: "Number of blocks loaded from ucm" |
| 27 | + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
| 28 | + - name: "load_duration" |
| 29 | + documentation: "Time to load from ucm (ms)" |
| 30 | + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
| 31 | + - name: "load_speed" |
| 32 | + documentation: "Speed of loading from ucm (GB/s)" |
| 33 | + buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100] |
| 34 | + - name: "save_requests_num" |
| 35 | + documentation: "Number of requests saved to ucm" |
| 36 | + buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000] |
| 37 | + - name: "save_blocks_num" |
| 38 | + documentation: "Number of blocks saved to ucm" |
| 39 | + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
| 40 | + - name: "save_duration" |
| 41 | + documentation: "Time to save to ucm (ms)" |
| 42 | + buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000] |
| 43 | + - name: "save_speed" |
| 44 | + documentation: "Speed of saving to ucm (GB/s)" |
| 45 | + buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100] |
| 46 | + - name: "interval_lookup_hit_rates" |
| 47 | + documentation: "Hit rates of ucm lookup requests" |
| 48 | + buckets: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] |
56 | 49 |
|
0 commit comments