ModelEngine-Group
diff --git a/‎examples/metrics/metrics_configs.yaml‎
Lines changed: 43 additions & 50 deletions b/‎examples/metrics/metrics_configs.yaml‎
Lines changed: 43 additions & 50 deletions
diff --git a/‎ucm/integration/vllm/ucm_connector.py‎
Lines changed: 7 additions & 11 deletions b/‎ucm/integration/vllm/ucm_connector.py‎
Lines changed: 7 additions & 11 deletions
@@ -2,55 +2,48 @@
 # This file defines which metrics should be enabled and their configurations
 log_interval: 5  # Interval in seconds for logging metrics
 
-prometheus:
-  multiproc_dir: "/vllm-workspace"  # Directory for Prometheus multiprocess mode
+multiproc_dir: "/vllm-workspace"  # Directory for Prometheus multiprocess mode
 
-  metric_prefix: "ucm:" 
-  
-  # Enable/disable metrics by category
-  enabled_metrics:
-    counters: true
-    gauges: true
-    histograms: true
-  
-  # Counter metrics configuration
-  # counters:
-  #   - name: "received_requests"
-  #     documentation: "Total number of requests sent to ucm"
-  
-  # Gauge metrics configuration
-  # gauges:
-  #   - name: "lookup_hit_rate"
-  #     documentation: "Hit rate of ucm lookup requests since last log"
-  #     multiprocess_mode: "livemostrecent"
-  
-  # Histogram metrics configuration
-  histograms:
-    - name: "load_requests_num"
-      documentation: "Number of requests loaded from ucm"
-      buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
-    - name: "load_blocks_num"
-      documentation: "Number of blocks loaded from ucm"
-      buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
-    - name: "load_duration"
-      documentation: "Time to load from ucm (ms)"
-      buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
-    - name: "load_speed"
-      documentation: "Speed of loading from ucm (GB/s)"
-      buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
-    - name: "save_requests_num"
-      documentation: "Number of requests saved to ucm"
-      buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
-    - name: "save_blocks_num"
-      documentation: "Number of blocks saved to ucm"
-      buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
-    - name: "save_duration"
-      documentation: "Time to save to ucm (ms)"
-      buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
-    - name: "save_speed"
-      documentation: "Speed of saving to ucm (GB/s)"
-      buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
-    - name: "interval_lookup_hit_rates"
-      documentation: "Hit rates of ucm lookup requests"
-      buckets: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+metric_prefix: "ucm:" 
+
+# Counter metrics configuration
+# counter:
+#   - name: "received_requests"
+#     documentation: "Total number of requests sent to ucm"
+
+# Gauge metrics configuration
+# gauge:
+#   - name: "lookup_hit_rate"
+#     documentation: "Hit rate of ucm lookup requests since last log"
+#     multiprocess_mode: "livemostrecent"
+
+# Histogram metrics configuration
+histogram:
+  - name: "load_requests_num"
+    documentation: "Number of requests loaded from ucm"
+    buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
+  - name: "load_blocks_num"
+    documentation: "Number of blocks loaded from ucm"
+    buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
+  - name: "load_duration"
+    documentation: "Time to load from ucm (ms)"
+    buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
+  - name: "load_speed"
+    documentation: "Speed of loading from ucm (GB/s)"
+    buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
+  - name: "save_requests_num"
+    documentation: "Number of requests saved to ucm"
+    buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1000]
+  - name: "save_blocks_num"
+    documentation: "Number of blocks saved to ucm"
+    buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
+  - name: "save_duration"
+    documentation: "Time to save to ucm (ms)"
+    buckets: [0, 50, 100, 150, 200, 250, 300, 350, 400, 550, 600, 750, 800, 850, 900, 950, 1000]
+  - name: "save_speed"
+    documentation: "Speed of saving to ucm (GB/s)"
+    buckets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 50, 60, 70, 80, 90, 100]
+  - name: "interval_lookup_hit_rates"
+    documentation: "Hit rates of ucm lookup requests"
+    buckets: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
 
@@ -18,8 +18,8 @@
 from vllm.v1.core.sched.output import SchedulerOutput
 
 from ucm.logger import init_logger
-from ucm.observability import UCMStatsLogger
-from ucm.shared.metrics import ucmmonitor
+from ucm.observability import PrometheusStatsLogger
+from ucm.shared.metrics import ucmmetrics
 from ucm.store.factory import UcmConnectorFactory
 from ucm.store.ucmstore import Task, UcmKVStoreBase
 from ucm.utils import Config
@@ -172,8 +172,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
 
         self.metrics_config = self.launch_config.get("metrics_config_path", "")
         if self.metrics_config:
-            ucmmonitor.create_stats("ConnStats")
-            self.stats_logger = UCMStatsLogger(
+            self.stats_logger = PrometheusStatsLogger(
                 vllm_config.model_config.served_model_name,
                 self.global_rank,
                 self.metrics_config,
@@ -236,9 +235,8 @@ def get_num_new_matched_tokens(
             f"hit external: {external_hit_blocks}"
         )
         if self.metrics_config:
-            ucmmonitor.update_stats(
-                "ConnStats",
-                {"interval_lookup_hit_rates": external_hit_blocks / len(ucm_block_ids)},
+            ucmmetrics.update_stats(
+                "interval_lookup_hit_rates", external_hit_blocks / len(ucm_block_ids)
             )
 
         total_hit_block_num = hbm_hit_block_num + external_hit_blocks
@@ -532,8 +530,7 @@ def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None:
             / 1024
         )  # GB/s
         if self.metrics_config and is_load:
-            ucmmonitor.update_stats(
-                "ConnStats",
+            ucmmetrics.update_stats(
                 {
                     "load_requests_num": num_loaded_request,
                     "load_blocks_num": num_loaded_block,
@@ -622,8 +619,7 @@ def wait_for_save(self) -> None:
             / 1024
         )  # GB/s
         if self.metrics_config and is_save:
-            ucmmonitor.update_stats(
-                "ConnStats",
+            ucmmetrics.update_stats(
                 {
                     "save_requests_num": num_saved_request,
                     "save_blocks_num": num_saved_block,