Skip to content

Commit 377e7e9

Browse files
author
Shrey Modi
committed
retry gcs logic
1 parent 8382181 commit 377e7e9

File tree

1 file changed

+64
-32
lines changed

1 file changed

+64
-32
lines changed

eval_protocol/evaluation.py

Lines changed: 64 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ def preview(self, sample_file, max_samples=5):
388388
}
389389

390390
api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
391-
print("show payload", payload)
391+
392392
if "dev.api.fireworks.ai" in api_base and account_id == "fireworks":
393393
account_id = "pyroworks-dev"
394394

@@ -662,8 +662,6 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
662662
payload_rollup_settings = {"skipRollup": True}
663663
parent = f"accounts/{account_id}"
664664

665-
version_str = None
666-
667665
try:
668666
version_str = get_pep440_version()
669667
except Exception:
@@ -676,7 +674,6 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
676674
"description": self.description,
677675
"multiMetrics": payload_multi_metrics,
678676
"commitHash": version_str,
679-
# "rewardFunctionMode": self.reward_function_mode, # How input is processed by user func
680677
"criteria": self._build_minimal_criteria(),
681678
"requirements": "",
682679
"rollupSettings": payload_rollup_settings,
@@ -711,15 +708,14 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
711708

712709
try:
713710
if force:
714-
base_url_2 = f"{self.api_base}/v1/{parent}/evaluators"
715-
check_url = f"{base_url_2}/{evaluator_id}"
711+
check_url = f"{self.api_base}/v1/{parent}/evaluators/{evaluator_id}"
716712
try:
717-
logger.info(f"check_url: {check_url}, headers: {headers}")
713+
logger.info(f"Checking if evaluator exists: {check_url}")
718714
check_response = requests.get(check_url, headers=headers)
719715

720716
if check_response.status_code == 200:
721717
logger.info(f"Evaluator '{evaluator_id}' already exists, deleting and recreating...")
722-
delete_url = f"{base_url_2}/{evaluator_id}"
718+
delete_url = f"{self.api_base}/v1/{parent}/evaluators/{evaluator_id}"
723719
try:
724720
delete_response = requests.delete(delete_url, headers=headers)
725721
if delete_response.status_code < 400:
@@ -730,15 +726,13 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
730726
)
731727
except Exception as e_del:
732728
logger.warning(f"Error deleting evaluator: {str(e_del)}")
733-
logger.info(f"base_url: {base_url_2}, payload_data: {payload_data}, headers: {headers}")
734729
response = requests.post(base_url, json=payload_data, headers=headers)
735730
else:
736-
print(f"base_url: {base_url_2}, payload_data: {payload_data}, headers: {headers}")
737731
response = requests.post(base_url, json=payload_data, headers=headers)
738732
except requests.exceptions.RequestException:
739-
response = requests.post(base_url_2, json=payload_data, headers=headers)
733+
response = requests.post(base_url, json=payload_data, headers=headers)
740734
else:
741-
logger.info(f"check_url: {base_url}, headers: {headers}, payload_data: {payload_data}")
735+
logger.info(f"Creating evaluator at: {base_url}")
742736
response = requests.post(base_url, json=payload_data, headers=headers)
743737

744738
response.raise_for_status()
@@ -776,25 +770,64 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
776770

777771
file_size = os.path.getsize(tar_path)
778772

779-
with open(tar_path, "rb") as f:
780-
# Create request exactly like Golang
781-
req = requests.Request(
782-
"PUT",
783-
signed_url,
784-
data=f,
785-
headers={
786-
"Content-Type": "application/octet-stream",
787-
"X-Goog-Content-Length-Range": f"{file_size},{file_size}",
788-
},
789-
)
790-
prepared = req.prepare()
791-
792-
# Don't let requests add extra headers
793-
session = requests.Session()
794-
gcs_response = session.send(prepared, timeout=600)
795-
gcs_response.raise_for_status()
796-
797-
logger.info(f"Successfully uploaded {tar_filename}")
773+
# Retry configuration
774+
max_retries = 3
775+
retry_delay = 2 # seconds
776+
777+
for attempt in range(max_retries):
778+
try:
779+
with open(tar_path, "rb") as f:
780+
# Create request exactly like Golang
781+
req = requests.Request(
782+
"PUT",
783+
signed_url,
784+
data=f,
785+
headers={
786+
"Content-Type": "application/octet-stream",
787+
"X-Goog-Content-Length-Range": f"{file_size},{file_size}",
788+
},
789+
)
790+
prepared = req.prepare()
791+
792+
# Don't let requests add extra headers
793+
session = requests.Session()
794+
gcs_response = session.send(prepared, timeout=600)
795+
gcs_response.raise_for_status()
796+
797+
logger.info(f"Successfully uploaded {tar_filename}")
798+
break # Success, exit retry loop
799+
800+
except (requests.exceptions.RequestException, IOError) as e:
801+
if attempt < max_retries - 1:
802+
# Check if it's a retryable error
803+
is_retryable = False
804+
if isinstance(e, requests.exceptions.RequestException):
805+
if hasattr(e, "response") and e.response is not None:
806+
# Retry on 5xx errors or 408 (timeout)
807+
is_retryable = (
808+
e.response.status_code >= 500 or e.response.status_code == 408
809+
)
810+
else:
811+
# Network errors (no response) are retryable
812+
is_retryable = True
813+
else:
814+
# IOError is retryable
815+
is_retryable = True
816+
817+
if is_retryable:
818+
wait_time = retry_delay * (2**attempt) # Exponential backoff
819+
logger.warning(
820+
f"Upload attempt {attempt + 1}/{max_retries} failed: {e}. "
821+
f"Retrying in {wait_time}s..."
822+
)
823+
time.sleep(wait_time)
824+
else:
825+
# Non-retryable error, raise immediately
826+
raise
827+
else:
828+
# Last attempt failed
829+
logger.error(f"Upload failed after {max_retries} attempts")
830+
raise
798831

799832
# Step 3: Validate upload
800833
validate_url = f"{self.api_base}/v1/{evaluator_name}:validateUpload"
@@ -812,7 +845,6 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
812845
logger.warning(f"Code upload failed (evaluator created but code not uploaded): {upload_error}")
813846
# Don't fail - evaluator is created, just code upload failed
814847

815-
# return result # OLD: Direct return
816848
return result # Return after attempting upload
817849
except Exception as e:
818850
logger.error(f"Error creating evaluator: {str(e)}")

0 commit comments

Comments
 (0)