auto no prefix needed (#404)

xzrderek · web-flow · commit ed228574ae0c · 2026-01-08T13:39:34.000-08:00
* auto no prefix needed

* update

* update test
diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
@@ -20,13 +20,12 @@
     EvaluationRow,
     EvaluationThreshold,
     EvaluationThresholdDict,
-    EvaluateResult,
     Status,
     EPParameters,
 )
 from eval_protocol.pytest.dual_mode_wrapper import create_dual_mode_wrapper
 from eval_protocol.pytest.evaluation_test_postprocess import postprocess
-from eval_protocol.pytest.execution import execute_pytest, execute_pytest_with_exception_handling
+from eval_protocol.pytest.execution import execute_pytest_with_exception_handling
 from eval_protocol.pytest.priority_scheduler import execute_priority_rollouts
 from eval_protocol.pytest.generate_parameter_combinations import (
     ParameterizedTestKwargs,
@@ -56,6 +55,7 @@
     AggregationMethod,
     add_cost_metrics,
     log_eval_status_and_rows,
+    normalize_fireworks_model,
     parse_ep_completion_params,
     parse_ep_completion_params_overwrite,
     parse_ep_max_concurrent_rollouts,
@@ -205,6 +205,7 @@ def evaluation_test(
     max_dataset_rows = parse_ep_max_rows(max_dataset_rows)
     completion_params = parse_ep_completion_params(completion_params)
     completion_params = parse_ep_completion_params_overwrite(completion_params)
+    completion_params = [normalize_fireworks_model(cp) for cp in completion_params]
     original_completion_params = completion_params
     passed_threshold = parse_ep_passed_threshold(passed_threshold)
     data_loaders = parse_ep_dataloaders(data_loaders)
@@ -365,6 +366,7 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
                             row.input_metadata.row_id = generate_id(seed=0, index=index)
 
                     completion_params = kwargs["completion_params"] if "completion_params" in kwargs else None
+                    completion_params = normalize_fireworks_model(completion_params)
                     # Create eval metadata with test function info and current commit hash
                     eval_metadata = EvalMetadata(
                         name=test_func.__name__,
diff --git a/eval_protocol/pytest/evaluation_test_utils.py b/eval_protocol/pytest/evaluation_test_utils.py
@@ -619,3 +619,22 @@ def build_rollout_processor_config(
         server_script_path=None,
         kwargs=rollout_processor_kwargs,
     )
+
+
+def normalize_fireworks_model(completion_params: CompletionParams | None) -> CompletionParams | None:
+    """Fireworks model names like 'accounts/<org>/models/<model>' need the fireworks_ai/
+    prefix when routing through LiteLLM. This function adds the prefix if missing.
+    """
+    if completion_params is None:
+        return None
+
+    model = completion_params.get("model")
+    if (
+        model
+        and isinstance(model, str)
+        and not model.startswith("fireworks_ai/")
+        and re.match(r"^accounts/[^/]+/models/.+", model)
+    ):
+        completion_params = completion_params.copy()
+        completion_params["model"] = f"fireworks_ai/{model}"
+    return completion_params
diff --git a/tests/pytest/test_pydantic_agent.py b/tests/pytest/test_pydantic_agent.py
@@ -10,7 +10,10 @@
 
 
 def agent_factory(config: RolloutProcessorConfig) -> Agent:
-    model = OpenAIChatModel(config.completion_params["model"], provider="fireworks")
+    model_name = config.completion_params["model"]
+    if model_name.startswith("fireworks_ai/"):
+        model_name = model_name[len("fireworks_ai/") :]
+    model = OpenAIChatModel(model_name, provider="fireworks")
     return Agent(model=model)
 
 
diff --git a/tests/remote_server/test_remote_fireworks.py b/tests/remote_server/test_remote_fireworks.py
@@ -105,7 +105,7 @@ def rows() -> List[EvaluationRow]:
 
 @pytest.mark.parametrize(
     "completion_params",
-    [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}],
+    [{"model": "accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}],
 )
 @evaluation_test(
     data_loaders=DynamicDataLoader(

Original file line number	Diff line number	Diff line change
`@@ -105,7 +105,7 @@ def rows() -> List[EvaluationRow]:`
`105`	`105`
`106`	`106`	`@pytest.mark.parametrize(`
`107`	`107`	`"completion_params",`
`108`		`- [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}],`
	`108`	`+ [{"model": "accounts/fireworks/models/gpt-oss-120b", "temperature": 0.5}],`
`109`	`109`	`)`
`110`	`110`	`@evaluation_test(`
`111`	`111`	`data_loaders=DynamicDataLoader(`