-
Notifications
You must be signed in to change notification settings - Fork 7.5k
[Serve] Skip sending autoscaling metric when previous one is in flight #61515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bfad9d1
6b05951
7862465
d6cc6ef
58d6542
1838c80
ab95f3e
278a684
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,8 +36,8 @@ | |
| ) | ||
| from ray.serve._private.config import DeploymentConfig | ||
| from ray.serve._private.constants import ( | ||
| RAY_SERVE_AUTOSCALING_METRIC_RECORD_INTERVAL_FACTOR, | ||
| RAY_SERVE_COLLECT_AUTOSCALING_METRICS_ON_HANDLE, | ||
| RAY_SERVE_HANDLE_AUTOSCALING_METRIC_RECORD_INTERVAL_S, | ||
| RAY_SERVE_METRICS_EXPORT_INTERVAL_MS, | ||
| RAY_SERVE_PROXY_PREFER_LOCAL_AZ_ROUTING, | ||
| SERVE_LOGGER_NAME, | ||
|
|
@@ -68,11 +68,14 @@ | |
| ) | ||
| from ray.serve._private.usage import ServeUsageTag | ||
| from ray.serve._private.utils import ( | ||
| check_obj_ref_ready_nowait, | ||
| compress_metric_report, | ||
| generate_request_id, | ||
| resolve_deployment_response, | ||
| ) | ||
| from ray.serve.config import AutoscalingConfig | ||
| from ray.serve.exceptions import BackPressureError, DeploymentUnavailableError | ||
| from ray.types import ObjectRef | ||
| from ray.util import metrics | ||
|
|
||
| logger = logging.getLogger(SERVE_LOGGER_NAME) | ||
|
|
@@ -154,6 +157,10 @@ def __init__( | |
| # Track whether the metrics manager has been shutdown | ||
| self._shutdown: bool = False | ||
|
|
||
| # Tracks in-flight metrics push to controller. Skip if new one is sent. | ||
| self._pending_metrics_push_ref: Optional[ObjectRef] = None | ||
| self._metrics_push_lock = threading.Lock() | ||
|
|
||
| # If the interval is set to 0, eagerly sets all metrics. | ||
| self._cached_metrics_enabled = RAY_SERVE_METRICS_EXPORT_INTERVAL_MS != 0 | ||
| self._cached_metrics_interval_s = RAY_SERVE_METRICS_EXPORT_INTERVAL_MS / 1000 | ||
|
|
@@ -275,13 +282,14 @@ def update_deployment_config( | |
|
|
||
| # Record number of queued + ongoing requests at regular | ||
| # intervals into the in-memory metrics store | ||
| record_interval_s = ( | ||
| autoscaling_config.look_back_period_s | ||
| * RAY_SERVE_AUTOSCALING_METRIC_RECORD_INTERVAL_FACTOR | ||
| ) | ||
| self.metrics_pusher.register_or_update_task( | ||
| self.RECORD_METRICS_TASK_NAME, | ||
| self._add_autoscaling_metrics_point, | ||
| min( | ||
| RAY_SERVE_HANDLE_AUTOSCALING_METRIC_RECORD_INTERVAL_S, | ||
| autoscaling_config.metrics_interval_s, | ||
| ), | ||
| min(record_interval_s, autoscaling_config.metrics_interval_s), | ||
| ) | ||
| # Push metrics to the controller periodically. | ||
| self.metrics_pusher.register_or_update_task( | ||
|
|
@@ -365,10 +373,17 @@ def push_autoscaling_metrics_to_controller(self): | |
| """Pushes queued and running request metrics to the controller. | ||
|
|
||
| These metrics are used by the controller for autoscaling. | ||
| If a previous push is already in flight, skips this push (will try again next interval). | ||
| """ | ||
| self._controller_handle.record_autoscaling_metrics_from_handle.remote( | ||
| self._get_metrics_report() | ||
| ) | ||
| with self._metrics_push_lock: | ||
| if self._pending_metrics_push_ref is not None: | ||
| if not check_obj_ref_ready_nowait(self._pending_metrics_push_ref): | ||
| return # Previous push still in flight, skip and try again later | ||
| self._pending_metrics_push_ref = ( | ||
| self._controller_handle.record_autoscaling_metrics_from_handle.remote( | ||
| compress_metric_report(self._get_metrics_report()) | ||
| ) | ||
| ) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Scaled-to-zero cold-start push silently throttledMedium Severity
Additional Locations (1) |
||
|
|
||
| def _add_autoscaling_metrics_point(self): | ||
| """Adds metrics point for queued and running requests at replicas. | ||
|
|
@@ -446,11 +461,12 @@ def _get_metrics_report(self) -> HandleMetricReport: | |
| # If the running requests timeseries is empty, we set the sum | ||
| # to the current number of requests. | ||
| running_requests_sum = num_requests | ||
| avg_running_requests[replica_id] = ( | ||
| replica_str = replica_id.to_full_id_str() | ||
| avg_running_requests[replica_str] = ( | ||
| running_requests_sum / num_data_points | ||
| ) | ||
| # Get running requests data | ||
| running_requests[replica_id] = self.metrics_store.data.get( | ||
| running_requests[replica_str] = self.metrics_store.data.get( | ||
| replica_id, [TimeStampedValue(timestamp, num_requests)] | ||
| ) | ||
| handle_metric_report = HandleMetricReport( | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isn't this changing the record interval from 0.5 seconds to 6 seconds? (since the default look back is 30s)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it is, my argument is: couldn't find a argument against it, seem safe and reasonable default for large scales of cluster.