diff --git a/.codex/skills/babysit-pr/scripts/gh_pr_watch.py b/.codex/skills/babysit-pr/scripts/gh_pr_watch.py index face4e6981af..a250404824db 100755 --- a/.codex/skills/babysit-pr/scripts/gh_pr_watch.py +++ b/.codex/skills/babysit-pr/scripts/gh_pr_watch.py @@ -610,6 +610,10 @@ def unique_actions(actions): return out +def has_active_failed_job(failed_jobs): + return any(str(job.get("run_status") or "").lower() != "completed" for job in failed_jobs) + + def is_pr_ready_to_merge(pr, checks_summary, new_review_items): if pr["closed"] or pr["merged"]: return False @@ -643,7 +647,7 @@ def recommend_actions(pr, checks_summary, failed_runs, failed_jobs, new_review_i if new_review_items: actions.append("process_review_comment") - has_failed_pr_checks = checks_summary["failed_count"] > 0 or bool(failed_jobs) + has_failed_pr_checks = checks_summary["failed_count"] > 0 or has_active_failed_job(failed_jobs) if has_failed_pr_checks: if checks_summary["all_terminal"] and retries_used >= max_retries: actions.append("stop_exhausted_retries") diff --git a/.codex/skills/babysit-pr/scripts/test_gh_pr_watch.py b/.codex/skills/babysit-pr/scripts/test_gh_pr_watch.py index b636ee4c5573..ebbeab119c5a 100644 --- a/.codex/skills/babysit-pr/scripts/test_gh_pr_watch.py +++ b/.codex/skills/babysit-pr/scripts/test_gh_pr_watch.py @@ -118,6 +118,50 @@ def test_recommend_actions_prioritizes_review_comments(): ] +def test_recommend_actions_ignores_stale_failed_jobs_from_completed_runs(): + actions = gh_pr_watch.recommend_actions( + sample_pr(), + sample_checks(), + [], + [ + { + "run_id": 99, + "run_status": "completed", + "run_conclusion": "failure", + "job_name": "unit tests", + "conclusion": "failure", + } + ], + [], + 3, + 3, + ) + + assert actions == ["ready_to_merge"] + + +def test_recommend_actions_diagnoses_failed_jobs_from_active_runs(): + actions = gh_pr_watch.recommend_actions( + sample_pr(), + sample_checks(all_terminal=False, pending_count=1), + [], + [ + { + "run_id": 99, + "run_status": "in_progress", + "run_conclusion": "", + "job_name": "unit tests", + "conclusion": "failure", + } + ], + [], + 0, + 3, + ) + + assert actions == ["diagnose_ci_failure"] + + def test_run_watch_keeps_polling_open_ready_to_merge_pr(monkeypatch): sleeps = [] events = []