1111from eval_protocol .models import EvaluationRow
1212from eval_protocol .pytest .rollout_processor import RolloutProcessor
1313from eval_protocol .pytest .types import RolloutProcessorConfig
14+ from eval_protocol .pytest .utils import normalize_fireworks_model_for_litellm
1415
1516from eval_protocol .pytest .default_agent_rollout_processor import Agent
1617from klavis import Klavis
@@ -30,15 +31,15 @@ def __init__(
3031 self .server_name = server_name
3132 self .initialize_data_factory = initialize_data_factory
3233 self .klavis_client = Klavis (api_key = os .environ .get ("KLAVIS_API_KEY" ))
33-
34+
3435 def _init_sandbox (self ) -> CreateSandboxResponse :
3536 try :
3637 server_name_enum = SandboxMcpServer (self .server_name )
3738 return self .klavis_client .sandbox .create_sandbox (server_name = server_name_enum )
3839 except Exception as e :
3940 logger .error (f"Error creating sandbox: { str (e )} " , exc_info = True )
4041 raise
41-
42+
4243 @staticmethod
4344 def create_mcp_config (server_url : str , server_key : str = "main" , auth_token : str | None = None ) -> str :
4445 """Create a temporary MCP config file and return its path."""
@@ -47,26 +48,24 @@ def create_mcp_config(server_url: str, server_key: str = "main", auth_token: str
4748 server_key : {
4849 "url" : server_url ,
4950 "transport" : "streamable_http" ,
50- ** ({"authorization" : f"Bearer { auth_token } " } if auth_token else {})
51+ ** ({"authorization" : f"Bearer { auth_token } " } if auth_token else {}),
5152 }
5253 }
5354 }
54-
55+
5556 # Create a temp file that persists for the session
5657 fd , path = tempfile .mkstemp (suffix = ".json" , prefix = "mcp_config_" )
57- with os .fdopen (fd , 'w' ) as f :
58+ with os .fdopen (fd , "w" ) as f :
5859 json .dump (config , f )
5960 return path
6061
61- def __call__ (
62- self , rows : List [EvaluationRow ], config : RolloutProcessorConfig
63- ) -> List [asyncio .Task [EvaluationRow ]]:
62+ def __call__ (self , rows : List [EvaluationRow ], config : RolloutProcessorConfig ) -> List [asyncio .Task [EvaluationRow ]]:
6463 """Process evaluation rows with Klavis sandbox lifecycle management"""
6564 semaphore = config .semaphore
6665
6766 async def process_row (row : EvaluationRow ) -> EvaluationRow :
6867 """Process a single row with complete sandbox lifecycle"""
69-
68+
7069 start_time = time .perf_counter ()
7170 agent : Agent | None = None
7271 temp_config_path : str | None = None
@@ -88,25 +87,32 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
8887 if row .input_metadata is not None
8988 else None
9089 )
91-
90+
9291 if init_data :
93- logger .info (f"Initializing { self .server_name } sandbox { sandbox .sandbox_id } " )
92+ logger .info (f"Initializing { self .server_name } sandbox { sandbox .sandbox_id } " ) # pyright: ignore[reportOptionalMemberAccess]
9493 initialize_method = getattr (
95- self .klavis_client .sandbox , f"initialize_{ sandbox .server_name .value } _sandbox"
94+ self .klavis_client .sandbox ,
95+ f"initialize_{ sandbox .server_name .value } _sandbox" , # pyright: ignore[reportOptionalMemberAccess]
9696 )
97- init_response = initialize_method (sandbox_id = sandbox .sandbox_id , ** init_data )
97+ init_response = initialize_method (sandbox_id = sandbox .sandbox_id , ** init_data ) # pyright: ignore[reportOptionalMemberAccess]
9898 logger .info (f"Initialization response: { init_response } " )
99-
99+
100100 # Step 2: Create temporary MCP config with sandbox URL
101101 temp_config_path = self .create_mcp_config (
102- server_url = sandbox .server_url , server_key = sandbox .server_name .value
102+ server_url = sandbox .server_url , # pyright: ignore[reportOptionalMemberAccess]
103+ server_key = sandbox .server_name .value , # pyright: ignore[reportOptionalMemberAccess]
103104 )
104105 logger .info (f"MCP config created: { temp_config_path } " )
105106
106107 # Step 3: Run agent with sandbox MCP server
107- logger .info (f"Running agent for row { row .execution_metadata .rollout_id } with { self .server_name } sandbox" )
108+ logger .info (
109+ f"Running agent for row { row .execution_metadata .rollout_id } with { self .server_name } sandbox"
110+ )
111+ # Normalize Fireworks model names for LiteLLM routing
112+ completion_params = normalize_fireworks_model_for_litellm (row .input_metadata .completion_params ) or {}
113+ row .input_metadata .completion_params = completion_params
108114 agent = Agent (
109- model = row . input_metadata . completion_params ["model" ],
115+ model = completion_params ["model" ],
110116 row = row ,
111117 config_path = temp_config_path ,
112118 logger = config .logger ,
@@ -124,16 +130,16 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
124130 logger .info (f"Agent execution completed for row { row .execution_metadata .rollout_id } " )
125131
126132 # Step 4: Export sandbox data
127- dump_method = getattr (self .klavis_client .sandbox , f"dump_{ sandbox .server_name .value } _sandbox" )
128- dump_response = dump_method (sandbox_id = sandbox .sandbox_id )
133+ dump_method = getattr (self .klavis_client .sandbox , f"dump_{ sandbox .server_name .value } _sandbox" ) # pyright: ignore[reportOptionalMemberAccess]
134+ dump_response = dump_method (sandbox_id = sandbox .sandbox_id ) # pyright: ignore[reportOptionalMemberAccess]
129135 sandbox_data = dump_response .data
130136 logger .info (f"Sandbox data: { sandbox_data } " )
131137
132138 # Store sandbox data in row metadata for evaluation
133139 if not row .execution_metadata .extra :
134140 row .execution_metadata .extra = {}
135141 row .execution_metadata .extra ["sandbox_data" ] = sandbox_data
136- row .execution_metadata .extra ["sandbox_id" ] = sandbox .sandbox_id
142+ row .execution_metadata .extra ["sandbox_id" ] = sandbox .sandbox_id # pyright: ignore[reportOptionalMemberAccess]
137143 row .execution_metadata .extra ["server_name" ] = self .server_name
138144
139145 except Exception as e :
@@ -149,7 +155,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
149155 await agent .mcp_client .cleanup ()
150156 if temp_config_path and os .path .exists (temp_config_path ):
151157 os .unlink (temp_config_path )
152-
158+
153159 # Release sandbox
154160 if sandbox and sandbox .sandbox_id :
155161 try :
0 commit comments