From 30f662f2e2d2702b6c4f1af2cab330eb27d9bb87 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 5 May 2026 17:11:16 -0700 Subject: [PATCH 1/2] fix: backfill rollout status fields from logs when polling completes The lightweight `/status` endpoint on the tracing gateway only returns the status code; `Message`, `Details`, and `Extras` still live on the Logs table. After PR #446 stopped reading from `/logs` on terminal status, the SDK was constructing `Status(code=..., message="", details=[])` for every completed rollout and `EvalProtocolError(message="")` for failures, which broke `tests/remote_server/test_remote_fireworks_propagate_status.py` (`assert row.rollout_status.message == "test error"`). Restore the two-phase polling shape from the original PR: poll `/status` for the code, and on a terminal (non-RUNNING) code do one `async_search_logs` call to backfill `message`/`details`/`extras` from the matching log row. This is still ~1000x cheaper on the Logs table than the pre-#446 polling loop because the search runs once per rollout completion instead of every poll interval. Made-with: Cursor Co-authored-by: Cursor --- .../pytest/remote_rollout_processor.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 632d5e00..1d62a1a0 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -139,8 +139,25 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow: status_code, ) - status_message = status.get("message", "") or "" - status_details = status.get("details", []) or [] + # /status only returns the code; backfill message/details/extras from Logs once. + status_message: str = "" + status_details: list = [] + status_extras: dict = {} + completed_logs = await self._tracing_adapter.async_search_logs( + session, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"] + ) + for log in completed_logs: + sd = log.get("status") + if sd and isinstance(sd, dict) and "code" in sd: + status_message = sd.get("message", "") or "" + status_details = sd.get("details", []) or [] + raw_extras = log.get("extras") or {} + status_extras = { + k: v + for k, v in raw_extras.items() + if k not in ("logger_name", "level", "timestamp") + } + break exception = exception_for_status_code(status_code, status_message) if exception is not None: @@ -152,8 +169,7 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow: details=status_details, ) - status_extras = (status_result or {}).get("extras") - if isinstance(status_extras, dict): + if status_extras: if row.execution_metadata.extra: row.execution_metadata.extra.update(status_extras) else: From 3298857d7b2d8aaaa040e6123b554f496833b3ac Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 5 May 2026 17:23:47 -0700 Subject: [PATCH 2/2] fix: match backfilled log status to terminal status code Bugbot pointed out that the backfill loop could pick an earlier RUNNING/partial status log instead of the terminal one when a rollout emits multiple status-bearing logs. The reported `code` was always correct (it came from /status), but `message`/`details`/`extras` could be attached from the wrong row and the raised exception would carry misleading text. Match the log row's status code to the terminal code returned by /status so the backfill is deterministic. Made-with: Cursor --- eval_protocol/pytest/remote_rollout_processor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 1d62a1a0..66e888ce 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -146,9 +146,12 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow: completed_logs = await self._tracing_adapter.async_search_logs( session, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"] ) + # Pick the log row whose status code matches the terminal + # code from /status, so intermediate RUNNING checkpoints + # don't poison the backfill. for log in completed_logs: sd = log.get("status") - if sd and isinstance(sd, dict) and "code" in sd: + if isinstance(sd, dict) and sd.get("code") == status_code: status_message = sd.get("message", "") or "" status_details = sd.get("details", []) or [] raw_extras = log.get("extras") or {}