diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
index 2afb13b5..0ec07e67 100644
--- a/eval_protocol/pytest/evaluation_test.py
+++ b/eval_protocol/pytest/evaluation_test.py
@@ -19,7 +19,6 @@
     EvaluationRow,
     EvaluationThreshold,
     EvaluationThresholdDict,
-    EvaluateResult,
     Status,
 )
 from eval_protocol.pytest.dual_mode_wrapper import create_dual_mode_wrapper
@@ -430,19 +429,11 @@ async def _execute_pointwise_eval_with_semaphore(
                                     experiment_id=experiment_id,
                                     run_id=run_id,
                                 ):
-                                    try:
-                                        result = await execute_pytest(
-                                            test_func,
-                                            processed_row=row,
-                                            evaluation_test_kwargs=evaluation_test_kwargs,
-                                        )
-                                    except Exception as e:
-                                        result = row
-                                        result.evaluation_result = EvaluateResult(
-                                            score=0.0,
-                                            is_score_valid=False,
-                                            reason=f"Error during evaluation: {type(e).__name__}: {e}",
-                                        )
+                                    result = await execute_pytest(
+                                        test_func,
+                                        processed_row=row,
+                                        evaluation_test_kwargs=evaluation_test_kwargs,
+                                    )
                                 if not isinstance(result, EvaluationRow):
                                     raise ValueError(
                                         f"Test function {test_func.__name__} did not return an EvaluationRow instance. You must return an EvaluationRow instance from your test function decorated with @evaluation_test."
@@ -464,20 +455,11 @@ async def _execute_groupwise_eval_with_semaphore(
                                     run_id=run_id,
                                     rollout_ids=group_rollout_ids or None,
                                 ):
-                                    try:
-                                        results = await execute_pytest(
-                                            test_func,
-                                            processed_dataset=rows,
-                                            evaluation_test_kwargs=evaluation_test_kwargs,
-                                        )
-                                    except Exception as e:
-                                        results = rows
-                                        for row in results:
-                                            row.evaluation_result = EvaluateResult(
-                                            score=0.0,
-                                            is_score_valid=False,
-                                            reason=f"Error during evaluation: {type(e).__name__}: {e}",
-                                        )
+                                    results = await execute_pytest(
+                                        test_func,
+                                        processed_dataset=rows,
+                                        evaluation_test_kwargs=evaluation_test_kwargs,
+                                    )
                                 if not isinstance(results, list):
                                     raise ValueError(
                                         f"Test function {test_func.__name__} did not return a list of EvaluationRow instances. You must return a list of EvaluationRow instances from your test function decorated with @evaluation_test."