add try catch

mayinghan · mayinghan · commit a301862e1b9e · 2025-10-28T21:40:01.000-07:00
diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
@@ -19,6 +19,7 @@
     EvaluationRow,
     EvaluationThreshold,
     EvaluationThresholdDict,
+    EvaluateResult,
     Status,
 )
 from eval_protocol.pytest.dual_mode_wrapper import create_dual_mode_wrapper
@@ -429,11 +430,19 @@ async def _execute_pointwise_eval_with_semaphore(
                                     experiment_id=experiment_id,
                                     run_id=run_id,
                                 ):
-                                    result = await execute_pytest(
-                                        test_func,
-                                        processed_row=row,
-                                        evaluation_test_kwargs=evaluation_test_kwargs,
-                                    )
+                                    try:
+                                        result = await execute_pytest(
+                                            test_func,
+                                            processed_row=row,
+                                            evaluation_test_kwargs=evaluation_test_kwargs,
+                                        )
+                                    except Exception as e:
+                                        result = row
+                                        result.evaluation_result = EvaluateResult(
+                                            score=0.0,
+                                            is_score_valid=False,
+                                            reason=f"Error during evaluation: {type(e).__name__}: {e}",
+                                        )
                                 if not isinstance(result, EvaluationRow):
                                     raise ValueError(
                                         f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test."
@@ -455,11 +464,20 @@ async def _execute_groupwise_eval_with_semaphore(
                                     run_id=run_id,
                                     rollout_ids=group_rollout_ids or None,
                                 ):
-                                    results = await execute_pytest(
-                                        test_func,
-                                        processed_dataset=rows,
-                                        evaluation_test_kwargs=evaluation_test_kwargs,
-                                    )
+                                    try:
+                                        results = await execute_pytest(
+                                            test_func,
+                                            processed_dataset=rows,
+                                            evaluation_test_kwargs=evaluation_test_kwargs,
+                                        )
+                                    except Exception as e:
+                                        results = rows
+                                        for row in results:
+                                            row.evaluation_result = EvaluateResult(
+                                            score=0.0,
+                                            is_score_valid=False,
+                                            reason=f"Error during evaluation: {type(e).__name__}: {e}",
+                                        )
                                 if not isinstance(results, list):
                                     raise ValueError(
                                         f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test."