Skip to content

Commit b322bda

Browse files
author
Dylan Huang
committed
Enhance EvaluationWatcher to update rollout_status and refactor status checking logic
1 parent 75a6514 commit b322bda

File tree

1 file changed

+20
-11
lines changed

1 file changed

+20
-11
lines changed

eval_protocol/utils/logs_server.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,17 @@ def _check_running_evaluations(self):
179179
for row in logs:
180180
if self._should_update_status(row):
181181
logger.info(f"Updating status to 'stopped' for row {row.input_metadata.row_id} (PID {row.pid})")
182-
if row.eval_metadata is not None:
182+
183+
# Update eval_metadata.status if it's running
184+
if row.eval_metadata and row.eval_metadata.status and row.eval_metadata.status.is_running():
183185
row.eval_metadata.status = Status.aborted(
184186
f"Evaluation aborted since process {row.pid} stopped"
185187
)
188+
189+
# Update rollout_status if it's running
190+
if row.rollout_status and row.rollout_status.is_running():
191+
row.rollout_status = Status.aborted(f"Rollout aborted since process {row.pid} stopped")
192+
186193
updated_rows.append(row)
187194

188195
# Log all updated rows
@@ -196,16 +203,18 @@ def _check_running_evaluations(self):
196203

197204
def _should_update_status(self, row: "EvaluationRow") -> bool:
198205
"""Check if a row's status should be updated to 'stopped'."""
199-
# Check if the row has running status and a PID
200-
if (
201-
row.eval_metadata
202-
and row.eval_metadata.status
203-
and row.eval_metadata.status.is_running()
204-
and row.pid is not None
205-
):
206+
# Check if any status field should be updated
207+
return self._should_update_status_field(
208+
row.eval_metadata.status if row.eval_metadata else None, row.pid
209+
) or self._should_update_status_field(row.rollout_status, row.pid)
210+
211+
def _should_update_status_field(self, status: Optional["Status"], pid: Optional[int]) -> bool:
212+
"""Check if a specific status field should be updated to 'stopped'."""
213+
# Check if the status is running and there's a PID
214+
if status and status.is_running() and pid is not None:
206215
# Check if the process is still running
207216
try:
208-
process = psutil.Process(row.pid)
217+
process = psutil.Process(pid)
209218
# Check if process is still running
210219
if not process.is_running():
211220
return True
@@ -214,10 +223,10 @@ def _should_update_status(self, row: "EvaluationRow") -> bool:
214223
return True
215224
except psutil.AccessDenied:
216225
# Can't access process info, assume it's stopped
217-
logger.warning(f"Access denied to process {row.pid}, assuming stopped")
226+
logger.warning(f"Access denied to process {pid}, assuming stopped")
218227
return True
219228
except Exception as e:
220-
logger.error(f"Error checking process {row.pid}: {e}")
229+
logger.error(f"Error checking process {pid}: {e}")
221230
# On error, assume process is still running to be safe
222231
return False
223232

0 commit comments

Comments
 (0)