From f5011a4bdbfe1f6df0e7557a19ff545263981f38 Mon Sep 17 00:00:00 2001 From: supermario_leo Date: Thu, 9 Apr 2026 09:44:13 +0800 Subject: [PATCH] [Bugfix] Fix PrometheusStatsLogger crash on re-instantiation and non-daemon thread Two bugs in observability.py: 1. When _metric_mappings is already populated (e.g. multi-engine or hot-reload), __init__ returns early without initializing instance attributes (is_running, thread, config, etc.). Any subsequent call to shutdown() or __del__() raises AttributeError. Fix: always initialize core instance attributes before the early-return guard. 2. The update_stats_loop thread is not created as a daemon thread. If shutdown() is never explicitly called, this thread prevents the Python process from exiting cleanly. Fix: set daemon=True, and guard shutdown() against thread being None. Signed-off-by: supermario_leo --- ucm/observability.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ucm/observability.py b/ucm/observability.py index 6bc36099e..b7409dfed 100644 --- a/ucm/observability.py +++ b/ucm/observability.py @@ -66,12 +66,16 @@ def __init__(self, model_name, worker_id, config_path): Load metrics config from YAML file (config_path), register metrics using prometheus_client, and start a thread to get updated metrics. """ + # Always initialize instance attributes so that shutdown() and + # __del__() work even when we skip metric registration below. + self.config = self._load_config(config_path) + self.log_interval = self.config.get("log_interval", 10) + self.is_running = False + self.thread = None + if _metric_mappings: logger.warning("Metrics are already registered, skipping re-registration.") return - # Load metrics config - self.config = self._load_config(config_path) - self.log_interval = self.config.get("log_interval", 10) # Set up histogram max length histogram_max_length = self.config.get("histogram_max_length", 10000) @@ -97,9 +101,10 @@ def __init__(self, model_name, worker_id, config_path): # Initialize metrics based on config self._init_metrics_from_config() - # Start thread to update metrics + # Start daemon thread to update metrics so it won't block + # process exit if shutdown() is not explicitly called. self.is_running = True - self.thread = threading.Thread(target=self.update_stats_loop) + self.thread = threading.Thread(target=self.update_stats_loop, daemon=True) self.thread.start() def _register_metrics_by_type(self, metric_type): @@ -190,7 +195,8 @@ def update_stats_loop(self): def shutdown(self): self.is_running = False - self.thread.join() + if self.thread is not None: + self.thread.join() def __del__(self): try: