From 30f662f2e2d2702b6c4f1af2cab330eb27d9bb87 Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep01@hotmail.sg>
Date: Tue, 5 May 2026 17:11:16 -0700
Subject: [PATCH 1/2] fix: backfill rollout status fields from logs when
 polling completes

The lightweight `/status` endpoint on the tracing gateway only returns the
status code; `Message`, `Details`, and `Extras` still live on the Logs
table. After PR #446 stopped reading from `/logs` on terminal status, the
SDK was constructing `Status(code=..., message="", details=[])` for every
completed rollout and `EvalProtocolError(message="")` for failures, which
broke `tests/remote_server/test_remote_fireworks_propagate_status.py`
(`assert row.rollout_status.message == "test error"`).

Restore the two-phase polling shape from the original PR: poll `/status`
for the code, and on a terminal (non-RUNNING) code do one
`async_search_logs` call to backfill `message`/`details`/`extras` from
the matching log row. This is still ~1000x cheaper on the Logs table than
the pre-#446 polling loop because the search runs once per rollout
completion instead of every poll interval.

Made-with: Cursor
Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../pytest/remote_rollout_processor.py        | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py
index 632d5e00..1d62a1a0 100644
--- a/eval_protocol/pytest/remote_rollout_processor.py
+++ b/eval_protocol/pytest/remote_rollout_processor.py
@@ -139,8 +139,25 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow:
                         status_code,
                     )
 
-                    status_message = status.get("message", "") or ""
-                    status_details = status.get("details", []) or []
+                    # /status only returns the code; backfill message/details/extras from Logs once.
+                    status_message: str = ""
+                    status_details: list = []
+                    status_extras: dict = {}
+                    completed_logs = await self._tracing_adapter.async_search_logs(
+                        session, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"]
+                    )
+                    for log in completed_logs:
+                        sd = log.get("status")
+                        if sd and isinstance(sd, dict) and "code" in sd:
+                            status_message = sd.get("message", "") or ""
+                            status_details = sd.get("details", []) or []
+                            raw_extras = log.get("extras") or {}
+                            status_extras = {
+                                k: v
+                                for k, v in raw_extras.items()
+                                if k not in ("logger_name", "level", "timestamp")
+                            }
+                            break
 
                     exception = exception_for_status_code(status_code, status_message)
                     if exception is not None:
@@ -152,8 +169,7 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow:
                         details=status_details,
                     )
 
-                    status_extras = (status_result or {}).get("extras")
-                    if isinstance(status_extras, dict):
+                    if status_extras:
                         if row.execution_metadata.extra:
                             row.execution_metadata.extra.update(status_extras)
                         else:

From 3298857d7b2d8aaaa040e6123b554f496833b3ac Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep01@hotmail.sg>
Date: Tue, 5 May 2026 17:23:47 -0700
Subject: [PATCH 2/2] fix: match backfilled log status to terminal status code

Bugbot pointed out that the backfill loop could pick an earlier
RUNNING/partial status log instead of the terminal one when a rollout
emits multiple status-bearing logs. The reported `code` was always
correct (it came from /status), but `message`/`details`/`extras` could
be attached from the wrong row and the raised exception would carry
misleading text.

Match the log row's status code to the terminal code returned by
/status so the backfill is deterministic.

Made-with: Cursor
---
 eval_protocol/pytest/remote_rollout_processor.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py
index 1d62a1a0..66e888ce 100644
--- a/eval_protocol/pytest/remote_rollout_processor.py
+++ b/eval_protocol/pytest/remote_rollout_processor.py
@@ -146,9 +146,12 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow:
                     completed_logs = await self._tracing_adapter.async_search_logs(
                         session, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"]
                     )
+                    # Pick the log row whose status code matches the terminal
+                    # code from /status, so intermediate RUNNING checkpoints
+                    # don't poison the backfill.
                     for log in completed_logs:
                         sd = log.get("status")
-                        if sd and isinstance(sd, dict) and "code" in sd:
+                        if isinstance(sd, dict) and sd.get("code") == status_code:
                             status_message = sd.get("message", "") or ""
                             status_details = sd.get("details", []) or []
                             raw_extras = log.get("extras") or {}