Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 101 additions & 88 deletions eval_protocol/pytest/evaluation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,99 +331,112 @@ def get_auth_value(key):
fireworks_api_key = get_auth_value("FIREWORKS_API_KEY")
fireworks_account_id = get_auth_value("FIREWORKS_ACCOUNT_ID")

if fireworks_api_key and fireworks_account_id:
headers = {"Authorization": f"Bearer {fireworks_api_key}", "Content-Type": "application/json"}

# Make dataset first
dataset_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets"

dataset_payload = {
"dataset": {
"displayName": dataset_name,
"evalProtocol": {},
"format": "FORMAT_UNSPECIFIED",
"exampleCount": f"{len(exp_rows)}",
},
"datasetId": dataset_name,
}

dataset_response = requests.post(dataset_url, json=dataset_payload, headers=headers)

# Skip if dataset creation failed
if dataset_response.status_code not in [200, 201]:
_store_experiment_link(
experiment_id,
f"Dataset creation failed: {dataset_response.status_code} {dataset_response.text}",
"failure",
)
continue
if not (fireworks_api_key and fireworks_account_id):
_store_experiment_link(
experiment_id,
"No Fireworks API key AND account ID found",
"failure",
)
continue
elif not fireworks_api_key:
_store_experiment_link(
experiment_id,
"No Fireworks API key found",
"failure",
)
continue
elif not fireworks_account_id:
_store_experiment_link(
experiment_id,
"No Fireworks account ID found",
"failure",
)
continue

headers = {"Authorization": f"Bearer {fireworks_api_key}", "Content-Type": "application/json"}

# Make dataset first
dataset_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets"

dataset_payload = {
"dataset": {
"displayName": dataset_name,
"evalProtocol": {},
"format": "FORMAT_UNSPECIFIED",
"exampleCount": f"{len(exp_rows)}",
},
"datasetId": dataset_name,
}

dataset_response = requests.post(dataset_url, json=dataset_payload, headers=headers)

# Skip if dataset creation failed
if dataset_response.status_code not in [200, 201]:
_store_experiment_link(
experiment_id,
f"Dataset creation failed: {dataset_response.status_code} {dataset_response.text}",
"failure",
)
continue

dataset_data = dataset_response.json()
dataset_id = dataset_data.get("datasetId", dataset_name)
dataset_data = dataset_response.json()
dataset_id = dataset_data.get("datasetId", dataset_name)

# Upload the JSONL file content
upload_url = (
f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload"
# Upload the JSONL file content
upload_url = (
f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload"
)
upload_headers = {"Authorization": f"Bearer {fireworks_api_key}"}

with open(exp_file, "rb") as f:
files = {"file": f}
upload_response = requests.post(upload_url, files=files, headers=upload_headers)

# Skip if upload failed
if upload_response.status_code not in [200, 201]:
_store_experiment_link(
experiment_id,
f"File upload failed: {upload_response.status_code} {upload_response.text}",
"failure",
)
upload_headers = {"Authorization": f"Bearer {fireworks_api_key}"}

with open(exp_file, "rb") as f:
files = {"file": f}
upload_response = requests.post(upload_url, files=files, headers=upload_headers)

# Skip if upload failed
if upload_response.status_code not in [200, 201]:
_store_experiment_link(
experiment_id,
f"File upload failed: {upload_response.status_code} {upload_response.text}",
"failure",
)
continue

# Create evaluation job (optional - don't skip experiment if this fails)
eval_job_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/evaluationJobs"
# Truncate job ID to fit 63 character limit
job_id_base = f"{dataset_name}-job"
if len(job_id_base) > 63:
# Keep the "-job" suffix and truncate the dataset_name part
max_dataset_name_len = 63 - 4 # 4 = len("-job")
truncated_dataset_name = dataset_name[:max_dataset_name_len]
job_id_base = f"{truncated_dataset_name}-job"

eval_job_payload = {
"evaluationJobId": job_id_base,
"evaluationJob": {
"evaluator": f"accounts/{fireworks_account_id}/evaluators/dummy",
"inputDataset": f"accounts/{fireworks_account_id}/datasets/dummy",
"outputDataset": f"accounts/{fireworks_account_id}/datasets/{dataset_id}",
},
}

eval_response = requests.post(eval_job_url, json=eval_job_payload, headers=headers)

if eval_response.status_code in [200, 201]:
eval_job_data = eval_response.json()
job_id = eval_job_data.get("evaluationJobId", job_id_base)

_store_experiment_link(
experiment_id,
f"https://app.fireworks.ai/dashboard/evaluation-jobs/{job_id}",
"success",
)
else:
_store_experiment_link(
experiment_id,
f"Job creation failed: {eval_response.status_code} {eval_response.text}",
"failure",
)
continue

# Create evaluation job (optional - don't skip experiment if this fails)
eval_job_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/evaluationJobs"
# Truncate job ID to fit 63 character limit
job_id_base = f"{dataset_name}-job"
if len(job_id_base) > 63:
# Keep the "-job" suffix and truncate the dataset_name part
max_dataset_name_len = 63 - 4 # 4 = len("-job")
truncated_dataset_name = dataset_name[:max_dataset_name_len]
job_id_base = f"{truncated_dataset_name}-job"

eval_job_payload = {
"evaluationJobId": job_id_base,
"evaluationJob": {
"evaluator": f"accounts/{fireworks_account_id}/evaluators/dummy",
"inputDataset": f"accounts/{fireworks_account_id}/datasets/dummy",
"outputDataset": f"accounts/{fireworks_account_id}/datasets/{dataset_id}",
},
}

eval_response = requests.post(eval_job_url, json=eval_job_payload, headers=headers)

if eval_response.status_code in [200, 201]:
eval_job_data = eval_response.json()
job_id = eval_job_data.get("evaluationJobId", job_id_base)

_store_experiment_link(
experiment_id,
f"https://app.fireworks.ai/dashboard/evaluation-jobs/{job_id}",
"success",
)
else:
# Store failure for missing credentials for all experiments
for experiment_id, exp_rows in experiments.items():
if experiment_id and exp_rows:
_store_experiment_link(
experiment_id, "No Fireworks API key or account ID found", "failure"
)
_store_experiment_link(
experiment_id,
f"Job creation failed: {eval_response.status_code} {eval_response.text}",
"failure",
)

except Exception as e:
# Do not fail evaluation if experiment JSONL writing fails
Expand Down
Loading