Skip to content

Commit d5ea771

Browse files
committed
raise on assert
1 parent bf126c1 commit d5ea771

File tree

1 file changed

+31
-19
lines changed

1 file changed

+31
-19
lines changed

eval_protocol/pytest/evaluation_test.py

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -436,17 +436,23 @@ async def _execute_pointwise_eval_with_semaphore(
436436
processed_row=row,
437437
evaluation_test_kwargs=evaluation_test_kwargs,
438438
)
439+
except AssertionError:
440+
raise
439441
except Exception as e:
440-
result = row
441-
result.evaluation_result = EvaluateResult(
442-
score=0.0,
443-
is_score_valid=False,
444-
reason=f"Error during evaluation: {type(e).__name__}: {e}",
445-
)
446-
if result.eval_metadata is not None:
447-
result.eval_metadata.status = Status.error(
448-
f"Error during evaluation: {type(e).__name__}: {e}",
442+
# Default: capture non-assert exceptions unless explicitly disabled
443+
if os.getenv("EP_CAPTURE_EVAL_EXCEPTIONS", "1").strip() == "1":
444+
result = row
445+
result.evaluation_result = EvaluateResult(
446+
score=0.0,
447+
is_score_valid=False,
448+
reason=f"Error during evaluation: {type(e).__name__}: {e}",
449449
)
450+
if result.eval_metadata is not None:
451+
result.eval_metadata.status = Status.error(
452+
f"Error during evaluation: {type(e).__name__}: {e}",
453+
)
454+
else:
455+
raise
450456
if not isinstance(result, EvaluationRow):
451457
raise ValueError(
452458
f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test."
@@ -474,18 +480,24 @@ async def _execute_groupwise_eval_with_semaphore(
474480
processed_dataset=rows,
475481
evaluation_test_kwargs=evaluation_test_kwargs,
476482
)
483+
except AssertionError:
484+
raise
477485
except Exception as e:
478-
results = rows
479-
for row in results:
480-
row.evaluation_result = EvaluateResult(
481-
score=0.0,
482-
is_score_valid=False,
483-
reason=f"Error during evaluation: {type(e).__name__}: {e}",
484-
)
485-
if row.eval_metadata is not None:
486-
row.eval_metadata.status = Status.error(
487-
f"Error during evaluation: {type(e).__name__}: {e}",
486+
# Default: capture non-assert exceptions unless explicitly disabled
487+
if os.getenv("EP_CAPTURE_EVAL_EXCEPTIONS", "1").strip() == "1":
488+
results = rows
489+
for row in results:
490+
row.evaluation_result = EvaluateResult(
491+
score=0.0,
492+
is_score_valid=False,
493+
reason=f"Error during evaluation: {type(e).__name__}: {e}",
488494
)
495+
if row.eval_metadata is not None:
496+
row.eval_metadata.status = Status.error(
497+
f"Error during evaluation: {type(e).__name__}: {e}",
498+
)
499+
else:
500+
raise
489501
if not isinstance(results, list):
490502
raise ValueError(
491503
f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test."

0 commit comments

Comments
 (0)