diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index b82a60ab..115c1ad4 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -331,99 +331,112 @@ def get_auth_value(key): fireworks_api_key = get_auth_value("FIREWORKS_API_KEY") fireworks_account_id = get_auth_value("FIREWORKS_ACCOUNT_ID") - if fireworks_api_key and fireworks_account_id: - headers = {"Authorization": f"Bearer {fireworks_api_key}", "Content-Type": "application/json"} - - # Make dataset first - dataset_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets" - - dataset_payload = { - "dataset": { - "displayName": dataset_name, - "evalProtocol": {}, - "format": "FORMAT_UNSPECIFIED", - "exampleCount": f"{len(exp_rows)}", - }, - "datasetId": dataset_name, - } - - dataset_response = requests.post(dataset_url, json=dataset_payload, headers=headers) - - # Skip if dataset creation failed - if dataset_response.status_code not in [200, 201]: - _store_experiment_link( - experiment_id, - f"Dataset creation failed: {dataset_response.status_code} {dataset_response.text}", - "failure", - ) - continue + if not (fireworks_api_key and fireworks_account_id): + _store_experiment_link( + experiment_id, + "No Fireworks API key AND account ID found", + "failure", + ) + continue + elif not fireworks_api_key: + _store_experiment_link( + experiment_id, + "No Fireworks API key found", + "failure", + ) + continue + elif not fireworks_account_id: + _store_experiment_link( + experiment_id, + "No Fireworks account ID found", + "failure", + ) + continue + + headers = {"Authorization": f"Bearer {fireworks_api_key}", "Content-Type": "application/json"} + + # Make dataset first + dataset_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets" + + dataset_payload = { + "dataset": { + "displayName": dataset_name, + "evalProtocol": {}, + "format": "FORMAT_UNSPECIFIED", + "exampleCount": f"{len(exp_rows)}", + }, + "datasetId": dataset_name, + } + + dataset_response = requests.post(dataset_url, json=dataset_payload, headers=headers) + + # Skip if dataset creation failed + if dataset_response.status_code not in [200, 201]: + _store_experiment_link( + experiment_id, + f"Dataset creation failed: {dataset_response.status_code} {dataset_response.text}", + "failure", + ) + continue - dataset_data = dataset_response.json() - dataset_id = dataset_data.get("datasetId", dataset_name) + dataset_data = dataset_response.json() + dataset_id = dataset_data.get("datasetId", dataset_name) - # Upload the JSONL file content - upload_url = ( - f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload" + # Upload the JSONL file content + upload_url = ( + f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload" + ) + upload_headers = {"Authorization": f"Bearer {fireworks_api_key}"} + + with open(exp_file, "rb") as f: + files = {"file": f} + upload_response = requests.post(upload_url, files=files, headers=upload_headers) + + # Skip if upload failed + if upload_response.status_code not in [200, 201]: + _store_experiment_link( + experiment_id, + f"File upload failed: {upload_response.status_code} {upload_response.text}", + "failure", ) - upload_headers = {"Authorization": f"Bearer {fireworks_api_key}"} - - with open(exp_file, "rb") as f: - files = {"file": f} - upload_response = requests.post(upload_url, files=files, headers=upload_headers) - - # Skip if upload failed - if upload_response.status_code not in [200, 201]: - _store_experiment_link( - experiment_id, - f"File upload failed: {upload_response.status_code} {upload_response.text}", - "failure", - ) - continue - - # Create evaluation job (optional - don't skip experiment if this fails) - eval_job_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/evaluationJobs" - # Truncate job ID to fit 63 character limit - job_id_base = f"{dataset_name}-job" - if len(job_id_base) > 63: - # Keep the "-job" suffix and truncate the dataset_name part - max_dataset_name_len = 63 - 4 # 4 = len("-job") - truncated_dataset_name = dataset_name[:max_dataset_name_len] - job_id_base = f"{truncated_dataset_name}-job" - - eval_job_payload = { - "evaluationJobId": job_id_base, - "evaluationJob": { - "evaluator": f"accounts/{fireworks_account_id}/evaluators/dummy", - "inputDataset": f"accounts/{fireworks_account_id}/datasets/dummy", - "outputDataset": f"accounts/{fireworks_account_id}/datasets/{dataset_id}", - }, - } - - eval_response = requests.post(eval_job_url, json=eval_job_payload, headers=headers) - - if eval_response.status_code in [200, 201]: - eval_job_data = eval_response.json() - job_id = eval_job_data.get("evaluationJobId", job_id_base) - - _store_experiment_link( - experiment_id, - f"https://app.fireworks.ai/dashboard/evaluation-jobs/{job_id}", - "success", - ) - else: - _store_experiment_link( - experiment_id, - f"Job creation failed: {eval_response.status_code} {eval_response.text}", - "failure", - ) + continue + # Create evaluation job (optional - don't skip experiment if this fails) + eval_job_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/evaluationJobs" + # Truncate job ID to fit 63 character limit + job_id_base = f"{dataset_name}-job" + if len(job_id_base) > 63: + # Keep the "-job" suffix and truncate the dataset_name part + max_dataset_name_len = 63 - 4 # 4 = len("-job") + truncated_dataset_name = dataset_name[:max_dataset_name_len] + job_id_base = f"{truncated_dataset_name}-job" + + eval_job_payload = { + "evaluationJobId": job_id_base, + "evaluationJob": { + "evaluator": f"accounts/{fireworks_account_id}/evaluators/dummy", + "inputDataset": f"accounts/{fireworks_account_id}/datasets/dummy", + "outputDataset": f"accounts/{fireworks_account_id}/datasets/{dataset_id}", + }, + } + + eval_response = requests.post(eval_job_url, json=eval_job_payload, headers=headers) + + if eval_response.status_code in [200, 201]: + eval_job_data = eval_response.json() + job_id = eval_job_data.get("evaluationJobId", job_id_base) + + _store_experiment_link( + experiment_id, + f"https://app.fireworks.ai/dashboard/evaluation-jobs/{job_id}", + "success", + ) else: - # Store failure for missing credentials for all experiments - for experiment_id, exp_rows in experiments.items(): - if experiment_id and exp_rows: - _store_experiment_link( - experiment_id, "No Fireworks API key or account ID found", "failure" - ) + _store_experiment_link( + experiment_id, + f"Job creation failed: {eval_response.status_code} {eval_response.text}", + "failure", + ) except Exception as e: # Do not fail evaluation if experiment JSONL writing fails