|
20 | 20 | EvaluationRow, |
21 | 21 | EvaluationThreshold, |
22 | 22 | EvaluationThresholdDict, |
23 | | - EvaluateResult, |
24 | 23 | Status, |
25 | 24 | EPParameters, |
26 | 25 | ) |
27 | 26 | from eval_protocol.pytest.dual_mode_wrapper import create_dual_mode_wrapper |
28 | 27 | from eval_protocol.pytest.evaluation_test_postprocess import postprocess |
29 | | -from eval_protocol.pytest.execution import execute_pytest, execute_pytest_with_exception_handling |
| 28 | +from eval_protocol.pytest.execution import execute_pytest_with_exception_handling |
30 | 29 | from eval_protocol.pytest.priority_scheduler import execute_priority_rollouts |
31 | 30 | from eval_protocol.pytest.generate_parameter_combinations import ( |
32 | 31 | ParameterizedTestKwargs, |
|
56 | 55 | AggregationMethod, |
57 | 56 | add_cost_metrics, |
58 | 57 | log_eval_status_and_rows, |
| 58 | + normalize_fireworks_model, |
59 | 59 | parse_ep_completion_params, |
60 | 60 | parse_ep_completion_params_overwrite, |
61 | 61 | parse_ep_max_concurrent_rollouts, |
@@ -205,6 +205,7 @@ def evaluation_test( |
205 | 205 | max_dataset_rows = parse_ep_max_rows(max_dataset_rows) |
206 | 206 | completion_params = parse_ep_completion_params(completion_params) |
207 | 207 | completion_params = parse_ep_completion_params_overwrite(completion_params) |
| 208 | + completion_params = [normalize_fireworks_model(cp) for cp in completion_params] |
208 | 209 | original_completion_params = completion_params |
209 | 210 | passed_threshold = parse_ep_passed_threshold(passed_threshold) |
210 | 211 | data_loaders = parse_ep_dataloaders(data_loaders) |
@@ -365,6 +366,7 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo |
365 | 366 | row.input_metadata.row_id = generate_id(seed=0, index=index) |
366 | 367 |
|
367 | 368 | completion_params = kwargs["completion_params"] if "completion_params" in kwargs else None |
| 369 | + completion_params = normalize_fireworks_model(completion_params) |
368 | 370 | # Create eval metadata with test function info and current commit hash |
369 | 371 | eval_metadata = EvalMetadata( |
370 | 372 | name=test_func.__name__, |
|
0 commit comments