Skip to content

Commit a301862

Browse files
committed
add try catch
1 parent 9f352ed commit a301862

File tree

1 file changed

+28
-10
lines changed

1 file changed

+28
-10
lines changed

eval_protocol/pytest/evaluation_test.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
EvaluationRow,
2020
EvaluationThreshold,
2121
EvaluationThresholdDict,
22+
EvaluateResult,
2223
Status,
2324
)
2425
from eval_protocol.pytest.dual_mode_wrapper import create_dual_mode_wrapper
@@ -429,11 +430,19 @@ async def _execute_pointwise_eval_with_semaphore(
429430
experiment_id=experiment_id,
430431
run_id=run_id,
431432
):
432-
result = await execute_pytest(
433-
test_func,
434-
processed_row=row,
435-
evaluation_test_kwargs=evaluation_test_kwargs,
436-
)
433+
try:
434+
result = await execute_pytest(
435+
test_func,
436+
processed_row=row,
437+
evaluation_test_kwargs=evaluation_test_kwargs,
438+
)
439+
except Exception as e:
440+
result = row
441+
result.evaluation_result = EvaluateResult(
442+
score=0.0,
443+
is_score_valid=False,
444+
reason=f"Error during evaluation: {type(e).__name__}: {e}",
445+
)
437446
if not isinstance(result, EvaluationRow):
438447
raise ValueError(
439448
f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test."
@@ -455,11 +464,20 @@ async def _execute_groupwise_eval_with_semaphore(
455464
run_id=run_id,
456465
rollout_ids=group_rollout_ids or None,
457466
):
458-
results = await execute_pytest(
459-
test_func,
460-
processed_dataset=rows,
461-
evaluation_test_kwargs=evaluation_test_kwargs,
462-
)
467+
try:
468+
results = await execute_pytest(
469+
test_func,
470+
processed_dataset=rows,
471+
evaluation_test_kwargs=evaluation_test_kwargs,
472+
)
473+
except Exception as e:
474+
results = rows
475+
for row in results:
476+
row.evaluation_result = EvaluateResult(
477+
score=0.0,
478+
is_score_valid=False,
479+
reason=f"Error during evaluation: {type(e).__name__}: {e}",
480+
)
463481
if not isinstance(results, list):
464482
raise ValueError(
465483
f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test."

0 commit comments

Comments
 (0)