Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 10 additions & 28 deletions eval_protocol/pytest/evaluation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
EvaluationRow,
EvaluationThreshold,
EvaluationThresholdDict,
EvaluateResult,
Status,
)
from eval_protocol.pytest.dual_mode_wrapper import create_dual_mode_wrapper
Expand Down Expand Up @@ -430,19 +429,11 @@ async def _execute_pointwise_eval_with_semaphore(
experiment_id=experiment_id,
run_id=run_id,
):
try:
result = await execute_pytest(
test_func,
processed_row=row,
evaluation_test_kwargs=evaluation_test_kwargs,
)
except Exception as e:
result = row
result.evaluation_result = EvaluateResult(
score=0.0,
is_score_valid=False,
reason=f"Error during evaluation: {type(e).__name__}: {e}",
)
result = await execute_pytest(
test_func,
processed_row=row,
evaluation_test_kwargs=evaluation_test_kwargs,
)
if not isinstance(result, EvaluationRow):
raise ValueError(
f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test."
Expand All @@ -464,20 +455,11 @@ async def _execute_groupwise_eval_with_semaphore(
run_id=run_id,
rollout_ids=group_rollout_ids or None,
):
try:
results = await execute_pytest(
test_func,
processed_dataset=rows,
evaluation_test_kwargs=evaluation_test_kwargs,
)
except Exception as e:
results = rows
for row in results:
row.evaluation_result = EvaluateResult(
score=0.0,
is_score_valid=False,
reason=f"Error during evaluation: {type(e).__name__}: {e}",
)
results = await execute_pytest(
test_func,
processed_dataset=rows,
evaluation_test_kwargs=evaluation_test_kwargs,
)
if not isinstance(results, list):
raise ValueError(
f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test."
Expand Down
Loading