Skip to content

Commit 4c4e028

Browse files
committed
metrics
1 parent 44de8d4 commit 4c4e028

File tree

7 files changed

+198
-324
lines changed

7 files changed

+198
-324
lines changed

examples/metrics/metrics_configs.yaml

Lines changed: 43 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,55 +2,48 @@
22
# This file defines which metrics should be enabled and their configurations
33
log_interval: 5 # Interval in seconds for logging metrics
44

5-
prometheus:
6-
multiproc_dir: "/vllm-workspace" # Directory for Prometheus multiprocess mode
5+
multiproc_dir: "/vllm-workspace" # Directory for Prometheus multiprocess mode
76

8-
metric_prefix: "ucm:"
9-
10-
# Enable/disable metrics by category
11-
enabled_metrics:
12-
counters: true
13-
gauges: true
14-
histograms: true
15-
16-
# Counter metrics configuration
17-
# counters:
18-
# - name: "received_requests"
19-
# documentation: "Total number of requests sent to ucm"
20-
21-
# Gauge metrics configuration
22-
# gauges:
23-
# - name: "lookup_hit_rate"
24-
# documentation: "Hit rate of ucm lookup requests since last log"
25-
# multiprocess_mode: "livemostrecent"
26-
27-
# Histogram metrics configuration
28-
histograms:
29-
- name: "load_requests_num"
30-
documentation: "Number of requests loaded from ucm"
31-
buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
32-
- name: "load_blocks_num"
33-
documentation: "Number of blocks loaded from ucm"
34-
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
35-
- name: "load_duration"
36-
documentation: "Time to load from ucm (ms)"
37-
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
38-
- name: "load_speed"
39-
documentation: "Speed of loading from ucm (GB/s)"
40-
buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
41-
- name: "save_requests_num"
42-
documentation: "Number of requests saved to ucm"
43-
buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
44-
- name: "save_blocks_num"
45-
documentation: "Number of blocks saved to ucm"
46-
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
47-
- name: "save_duration"
48-
documentation: "Time to save to ucm (ms)"
49-
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
50-
- name: "save_speed"
51-
documentation: "Speed of saving to ucm (GB/s)"
52-
buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
53-
- name: "interval_lookup_hit_rates"
54-
documentation: "Hit rates of ucm lookup requests"
55-
buckets: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
7+
metric_prefix: "ucm:"
8+
9+
# Counter metrics configuration
10+
# counter:
11+
# - name: "received_requests"
12+
# documentation: "Total number of requests sent to ucm"
13+
14+
# Gauge metrics configuration
15+
# gauge:
16+
# - name: "lookup_hit_rate"
17+
# documentation: "Hit rate of ucm lookup requests since last log"
18+
# multiprocess_mode: "livemostrecent"
19+
20+
# Histogram metrics configuration
21+
histogram:
22+
- name: "load_requests_num"
23+
documentation: "Number of requests loaded from ucm"
24+
buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
25+
- name: "load_blocks_num"
26+
documentation: "Number of blocks loaded from ucm"
27+
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
28+
- name: "load_duration"
29+
documentation: "Time to load from ucm (ms)"
30+
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
31+
- name: "load_speed"
32+
documentation: "Speed of loading from ucm (GB/s)"
33+
buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
34+
- name: "save_requests_num"
35+
documentation: "Number of requests saved to ucm"
36+
buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
37+
- name: "save_blocks_num"
38+
documentation: "Number of blocks saved to ucm"
39+
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
40+
- name: "save_duration"
41+
documentation: "Time to save to ucm (ms)"
42+
buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
43+
- name: "save_speed"
44+
documentation: "Speed of saving to ucm (GB/s)"
45+
buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
46+
- name: "interval_lookup_hit_rates"
47+
documentation: "Hit rates of ucm lookup requests"
48+
buckets: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
5649

ucm/integration/vllm/ucm_connector.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
from vllm.v1.core.sched.output import SchedulerOutput
1919

2020
from ucm.logger import init_logger
21-
from ucm.observability import UCMStatsLogger
22-
from ucm.shared.metrics import ucmmonitor
21+
from ucm.observability import PrometheusStatsLogger
22+
from ucm.shared.metrics import ucmmetrics
2323
from ucm.store.factory import UcmConnectorFactory
2424
from ucm.store.ucmstore import Task, UcmKVStoreBase
2525
from ucm.utils import Config
@@ -172,8 +172,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
172172

173173
self.metrics_config = self.launch_config.get("metrics_config_path", "")
174174
if self.metrics_config:
175-
ucmmonitor.create_stats("ConnStats")
176-
self.stats_logger = UCMStatsLogger(
175+
self.stats_logger = PrometheusStatsLogger(
177176
vllm_config.model_config.served_model_name,
178177
self.global_rank,
179178
self.metrics_config,
@@ -236,9 +235,8 @@ def get_num_new_matched_tokens(
236235
f"hit external: {external_hit_blocks}"
237236
)
238237
if self.metrics_config:
239-
ucmmonitor.update_stats(
240-
"ConnStats",
241-
{"interval_lookup_hit_rates": external_hit_blocks / len(ucm_block_ids)},
238+
ucmmetrics.update_stats(
239+
"interval_lookup_hit_rates", external_hit_blocks / len(ucm_block_ids)
242240
)
243241

244242
total_hit_block_num = hbm_hit_block_num + external_hit_blocks
@@ -532,8 +530,7 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
532530
/ 1024
533531
) # GB/s
534532
if self.metrics_config and is_load:
535-
ucmmonitor.update_stats(
536-
"ConnStats",
533+
ucmmetrics.update_stats(
537534
{
538535
"load_requests_num": num_loaded_request,
539536
"load_blocks_num": num_loaded_block,
@@ -622,8 +619,7 @@ def wait_for_save(self) -> None:
622619
/ 1024
623620
) # GB/s
624621
if self.metrics_config and is_save:
625-
ucmmonitor.update_stats(
626-
"ConnStats",
622+
ucmmetrics.update_stats(
627623
{
628624
"save_requests_num": num_saved_request,
629625
"save_blocks_num": num_saved_block,

0 commit comments

Comments
 (0)