diff --git a/ucm/observability.py b/ucm/observability.py index 6bc36099e..b7409dfed 100644 --- a/ucm/observability.py +++ b/ucm/observability.py @@ -66,12 +66,16 @@ def __init__(self, model_name, worker_id, config_path): Load metrics config from YAML file (config_path), register metrics using prometheus_client, and start a thread to get updated metrics. """ + # Always initialize instance attributes so that shutdown() and + # __del__() work even when we skip metric registration below. + self.config = self._load_config(config_path) + self.log_interval = self.config.get("log_interval", 10) + self.is_running = False + self.thread = None + if _metric_mappings: logger.warning("Metrics are already registered, skipping re-registration.") return - # Load metrics config - self.config = self._load_config(config_path) - self.log_interval = self.config.get("log_interval", 10) # Set up histogram max length histogram_max_length = self.config.get("histogram_max_length", 10000) @@ -97,9 +101,10 @@ def __init__(self, model_name, worker_id, config_path): # Initialize metrics based on config self._init_metrics_from_config() - # Start thread to update metrics + # Start daemon thread to update metrics so it won't block + # process exit if shutdown() is not explicitly called. self.is_running = True - self.thread = threading.Thread(target=self.update_stats_loop) + self.thread = threading.Thread(target=self.update_stats_loop, daemon=True) self.thread.start() def _register_metrics_by_type(self, metric_type): @@ -190,7 +195,8 @@ def update_stats_loop(self): def shutdown(self): self.is_running = False - self.thread.join() + if self.thread is not None: + self.thread.join() def __del__(self): try: