77import time
88from typing import Any , Callable , Dict , Optional
99import inspect
10- import requests
1110import tempfile
1211from pydantic import ValidationError
1312
1413from ..auth import get_fireworks_api_base , get_fireworks_api_key
1514from ..fireworks_client import create_fireworks_client
16- from ..common_utils import get_user_agent , load_jsonl
15+ from ..common_utils import load_jsonl
1716from ..fireworks_rft import (
1817 create_dataset_from_jsonl ,
1918 detect_dataset_builder ,
2019 materialize_dataset_via_builder ,
2120)
2221from ..models import EvaluationRow
23- from .upload import upload_command
2422from .utils import (
2523 _build_entry_point ,
2624 _build_trimmed_dataset_id ,
@@ -222,64 +220,68 @@ def _extract_jsonl_from_input_dataset(test_file_path: str, test_func_name: str)
222220 return None
223221
224222
225- def _poll_evaluator_status (
226- evaluator_resource_name : str , api_key : str , api_base : str , timeout_minutes : int = 10
223+ def _poll_evaluator_version_status (
224+ evaluator_id : str ,
225+ version_id : str ,
226+ api_key : str ,
227+ api_base : str ,
228+ timeout_minutes : int = 10 ,
227229) -> bool :
228230 """
229- Poll evaluator status until it becomes ACTIVE or times out.
231+ Poll a specific evaluator version status until it becomes ACTIVE or times out.
232+
233+ Uses the Fireworks SDK to get the specified version of the evaluator and checks
234+ its build state.
230235
231236 Args:
232- evaluator_resource_name: Full evaluator resource name (e.g., accounts/xxx/evaluators/yyy)
237+ evaluator_id: The evaluator ID (not full resource name)
238+ version_id: The specific version ID to poll
233239 api_key: Fireworks API key
234240 api_base: Fireworks API base URL
235241 timeout_minutes: Maximum time to wait in minutes
236242
237243 Returns:
238- True if evaluator becomes ACTIVE, False if timeout or BUILD_FAILED
244+ True if evaluator version becomes ACTIVE, False if timeout or BUILD_FAILED
239245 """
240- headers = {
241- "Authorization" : f"Bearer { api_key } " ,
242- "Content-Type" : "application/json" ,
243- "User-Agent" : get_user_agent (),
244- }
245-
246- check_url = f"{ api_base } /v1/{ evaluator_resource_name } "
247246 timeout_seconds = timeout_minutes * 60
248247 poll_interval = 10 # seconds
249248 start_time = time .time ()
250249
251- print (f"Polling evaluator status (timeout: { timeout_minutes } m, interval: { poll_interval } s)..." )
250+ print (
251+ f"Polling evaluator version '{ version_id } ' status (timeout: { timeout_minutes } m, interval: { poll_interval } s)..."
252+ )
253+
254+ client = create_fireworks_client (api_key = api_key , base_url = api_base )
252255
253256 while time .time () - start_time < timeout_seconds :
254257 try :
255- response = requests .get (check_url , headers = headers , timeout = 30 )
256- response .raise_for_status ()
257-
258- evaluator_data = response .json ()
259- state = evaluator_data .get ("state" , "STATE_UNSPECIFIED" )
260- status = evaluator_data .get ("status" , "" )
258+ version = client .evaluator_versions .get (version_id , evaluator_id = evaluator_id )
259+ state = version .state or "STATE_UNSPECIFIED"
260+ status_msg = ""
261+ if version .status and version .status .message :
262+ status_msg = version .status .message
261263
262264 if state == "ACTIVE" :
263- print ("✅ Evaluator is ACTIVE and ready!" )
265+ print ("✅ Evaluator version is ACTIVE and ready!" )
264266 return True
265267 elif state == "BUILD_FAILED" :
266- print (f"❌ Evaluator build failed. Status: { status } " )
268+ print (f"❌ Evaluator version build failed. Status: { status_msg } " )
267269 return False
268270 elif state == "BUILDING" :
269271 elapsed_minutes = (time .time () - start_time ) / 60
270- print (f"⏳ Evaluator is still building... ({ elapsed_minutes :.1f} m elapsed)" )
272+ print (f"⏳ Evaluator version is still building... ({ elapsed_minutes :.1f} m elapsed)" )
271273 else :
272- print (f"⏳ Evaluator state: { state } , status: { status } " )
274+ print (f"⏳ Evaluator version state: { state } , status: { status_msg } " )
273275
274- except requests . exceptions . RequestException as e :
275- print (f"Warning: Failed to check evaluator status: { e } " )
276+ except Exception as e :
277+ print (f"Warning: Failed to check evaluator version status: { e } " )
276278
277279 # Wait before next poll
278280 time .sleep (poll_interval )
279281
280282 # Timeout reached
281283 elapsed_minutes = (time .time () - start_time ) / 60
282- print (f"⏰ Timeout after { elapsed_minutes :.1f} m - evaluator is not yet ACTIVE" )
284+ print (f"⏰ Timeout after { elapsed_minutes :.1f} m - evaluator version is not yet ACTIVE" )
283285 return False
284286
285287
@@ -564,40 +566,16 @@ def _upload_dataset(
564566def _upload_and_ensure_evaluator (
565567 project_root : str ,
566568 evaluator_id : str ,
567- evaluator_resource_name : str ,
568569 api_key : str ,
569570 api_base : str ,
570571) -> bool :
571- """Ensure the evaluator exists and is ACTIVE, uploading it if needed."""
572- # Check if evaluator already exists
573- try :
574- headers = {
575- "Authorization" : f"Bearer { api_key } " ,
576- "Content-Type" : "application/json" ,
577- "User-Agent" : get_user_agent (),
578- }
579- resp = requests .get (f"{ api_base } /v1/{ evaluator_resource_name } " , headers = headers , timeout = 10 )
580- if resp .ok :
581- state = resp .json ().get ("state" , "STATE_UNSPECIFIED" )
582- print (f"✓ Evaluator exists (state: { state } ). Skipping upload." )
583- # Poll for ACTIVE before proceeding
584- print (f"Waiting for evaluator '{ evaluator_id } ' to become ACTIVE..." )
585- if not _poll_evaluator_status (
586- evaluator_resource_name = evaluator_resource_name ,
587- api_key = api_key ,
588- api_base = api_base ,
589- timeout_minutes = 10 ,
590- ):
591- dashboard_url = _build_evaluator_dashboard_url (evaluator_id )
592- print ("\n ❌ Evaluator is not ready within the timeout period." )
593- print (f"📊 Please check the evaluator status at: { dashboard_url } " )
594- print (" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again." )
595- return False
596- return True
597- except requests .exceptions .RequestException :
598- pass
572+ """Upload evaluator and ensure its version becomes ACTIVE.
573+
574+ Creates/updates the evaluator and uploads the code, then polls the specific
575+ version until it becomes ACTIVE.
576+ """
577+ from eval_protocol .evaluation import create_evaluation
599578
600- # Ensure evaluator exists by invoking the upload flow programmatically
601579 try :
602580 tests = _discover_tests (project_root )
603581 selected_entry : Optional [str ] = None
@@ -614,39 +592,37 @@ def _upload_and_ensure_evaluator(
614592 )
615593 return False
616594
617- upload_args = argparse .Namespace (
618- path = project_root ,
619- entry = selected_entry ,
620- id = evaluator_id ,
621- display_name = None ,
622- description = None ,
623- yes = True ,
624- env_file = None ,
595+ print (f"\n Uploading evaluator '{ evaluator_id } '..." )
596+ result , version_id = create_evaluation (
597+ evaluator_id = evaluator_id ,
598+ display_name = evaluator_id ,
599+ description = f"Evaluator for { evaluator_id } " ,
600+ entry_point = selected_entry ,
625601 )
626602
627- rc = upload_command (upload_args )
628- if rc == 0 :
629- print (f"✓ Uploaded/ensured evaluator: { evaluator_id } " )
630-
631- # Poll for evaluator status
632- print (f"Waiting for evaluator '{ evaluator_id } ' to become ACTIVE..." )
633- is_active = _poll_evaluator_status (
634- evaluator_resource_name = evaluator_resource_name ,
635- api_key = api_key ,
636- api_base = api_base ,
637- timeout_minutes = 10 ,
638- )
603+ if not version_id :
604+ print ("Warning: Evaluator created but version upload failed." )
605+ return False
639606
640- if not is_active :
641- dashboard_url = _build_evaluator_dashboard_url (evaluator_id )
642- print ("\n ❌ Evaluator is not ready within the timeout period." )
643- print (f"📊 Please check the evaluator status at: { dashboard_url } " )
644- print (" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again." )
645- return False
646- return True
647- else :
648- print ("Warning: Evaluator upload did not complete successfully; proceeding to RFT creation." )
607+ print (f"✓ Uploaded evaluator: { evaluator_id } (version: { version_id } )" )
608+
609+ # Poll for the specific evaluator version status
610+ print (f"Waiting for evaluator '{ evaluator_id } ' version '{ version_id } ' to become ACTIVE..." )
611+ is_active = _poll_evaluator_version_status (
612+ evaluator_id = evaluator_id ,
613+ version_id = version_id ,
614+ api_key = api_key ,
615+ api_base = api_base ,
616+ timeout_minutes = 10 ,
617+ )
618+
619+ if not is_active :
620+ dashboard_url = _build_evaluator_dashboard_url (evaluator_id )
621+ print ("\n ❌ Evaluator version is not ready within the timeout period." )
622+ print (f"📊 Please check the evaluator status at: { dashboard_url } " )
623+ print (" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again." )
649624 return False
625+ return True
650626 except Exception as e :
651627 print (f"Warning: Failed to upload evaluator automatically: { e } " )
652628 return False
@@ -802,11 +778,10 @@ def create_rft_command(args) -> int:
802778 if not dataset_id or not dataset_resource :
803779 return 1
804780
805- # 5) Ensure evaluator exists and is ACTIVE (upload + poll if needed)
781+ # 5) Ensure evaluator exists and its latest version is ACTIVE (upload + poll if needed)
806782 if not _upload_and_ensure_evaluator (
807783 project_root = project_root ,
808784 evaluator_id = evaluator_id ,
809- evaluator_resource_name = evaluator_resource_name ,
810785 api_key = api_key ,
811786 api_base = api_base ,
812787 ):
0 commit comments