Skip to content

Commit aeb8863

Browse files
author
Shrey Modi
committed
anothe test
1 parent e85e6b9 commit aeb8863

File tree

1 file changed

+113
-53
lines changed

1 file changed

+113
-53
lines changed

tests/test_evaluation_preview_integration.py

Lines changed: 113 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,51 @@ def mock_create_api():
9191
"description": "Evaluates responses based on word count",
9292
}
9393

94+
def side_effect(*args, **kwargs):
95+
url = args[0]
96+
payload = kwargs.get("json", {})
97+
response = mock_post.return_value
98+
99+
if "getUploadEndpoint" in url:
100+
# Return signed URL for upload
101+
filename_to_size = payload.get("filename_to_size", {})
102+
signed_urls = {}
103+
for filename in filename_to_size.keys():
104+
signed_urls[filename] = f"https://storage.googleapis.com/test-bucket/{filename}?signed=true"
105+
response.json.return_value = {"filenameToSignedUrls": signed_urls}
106+
elif "validateUpload" in url:
107+
response.json.return_value = {"success": True, "valid": True}
108+
else:
109+
response.json.return_value = create_response
110+
111+
response.status_code = 200
112+
return response
113+
114+
mock_post.side_effect = side_effect
94115
mock_post.return_value = MagicMock()
95116
mock_post.return_value.status_code = 200
96117
mock_post.return_value.json.return_value = create_response
118+
mock_post.return_value.raise_for_status = MagicMock()
97119

98120
yield mock_post
99121

100122

123+
@pytest.fixture
124+
def mock_gcs_upload():
125+
"""Mock the GCS upload via requests.Session"""
126+
with patch("requests.Session") as mock_session_class:
127+
mock_session = MagicMock()
128+
mock_session_class.return_value = mock_session
129+
130+
# Mock successful GCS upload
131+
mock_gcs_response = MagicMock()
132+
mock_gcs_response.status_code = 200
133+
mock_gcs_response.raise_for_status = MagicMock()
134+
mock_session.send.return_value = mock_gcs_response
135+
136+
yield mock_session
137+
138+
101139
@pytest.fixture
102140
def mock_word_count_metric():
103141
"""Create a temporary directory with a word count metric"""
@@ -255,7 +293,7 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
255293
assert "word_count" in result.results[0].per_metric_evals
256294

257295

258-
def test_create_evaluation(mock_env_variables, mock_create_api, monkeypatch):
296+
def test_create_evaluation(mock_env_variables, mock_create_api, mock_gcs_upload, monkeypatch):
259297
"""Test the create_evaluation function in isolation"""
260298
from eval_protocol.evaluation import create_evaluation
261299

@@ -285,22 +323,33 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
285323
"""
286324
)
287325

288-
# Call create_evaluation
289-
result = create_evaluation(
290-
evaluator_id="word-count-eval",
291-
metric_folders=[f"word_count={os.path.join(tmp_dir, 'word_count')}"],
292-
display_name="Word Count Evaluator",
293-
description="Evaluates responses based on word count",
294-
force=True,
295-
)
326+
# Create requirements.txt
327+
with open(os.path.join(tmp_dir, "requirements.txt"), "w") as f:
328+
f.write("eval-protocol>=0.1.0\n")
296329

297-
# Verify results
298-
assert result["name"] == "accounts/test_account/evaluators/word-count-eval"
299-
assert result["displayName"] == "Word Count Evaluator"
300-
assert result["description"] == "Evaluates responses based on word count"
330+
# Change to temp directory
331+
original_cwd = os.getcwd()
332+
os.chdir(tmp_dir)
333+
334+
try:
335+
# Call create_evaluation
336+
result = create_evaluation(
337+
evaluator_id="word-count-eval",
338+
metric_folders=[f"word_count={os.path.join(tmp_dir, 'word_count')}"],
339+
display_name="Word Count Evaluator",
340+
description="Evaluates responses based on word count",
341+
force=True,
342+
)
301343

344+
# Verify results
345+
assert result["name"] == "accounts/test_account/evaluators/word-count-eval"
346+
assert result["displayName"] == "Word Count Evaluator"
347+
assert result["description"] == "Evaluates responses based on word count"
348+
finally:
349+
os.chdir(original_cwd)
302350

303-
def test_preview_then_create(monkeypatch, mock_env_variables, mock_preview_api, mock_create_api):
351+
352+
def test_preview_then_create(monkeypatch, mock_env_variables, mock_preview_api, mock_create_api, mock_gcs_upload):
304353
"""Test the full example flow (simulated)"""
305354
# Patch input to always return 'y'
306355
monkeypatch.setattr("builtins.input", lambda _: "y")
@@ -330,6 +379,10 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
330379
"""
331380
)
332381

382+
# Create requirements.txt
383+
with open(os.path.join(tmp_dir, "requirements.txt"), "w") as f:
384+
f.write("eval-protocol>=0.1.0\n")
385+
333386
# Create a temporary sample file
334387
sample_fd, sample_path = tempfile.mkstemp(suffix=".jsonl")
335388
with os.fdopen(sample_fd, "w") as f:
@@ -365,46 +418,53 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
365418
# Create a patched example module with modified paths
366419
from eval_protocol.evaluation import create_evaluation, preview_evaluation
367420

368-
# Define a patched main function
369-
def patched_main():
370-
# Preview the evaluation using metrics folder and samples file
371-
print("Previewing evaluation...")
372-
preview_result = preview_evaluation(
373-
metric_folders=[f"word_count={os.path.join(tmp_dir, 'word_count')}"],
374-
sample_file=sample_path,
375-
max_samples=2,
376-
)
377-
378-
preview_result.display()
379-
380-
# Check if 'used_preview_api' attribute exists and is True
381-
import eval_protocol.evaluation as evaluation_module
421+
# Change to temp directory
422+
original_cwd = os.getcwd()
423+
os.chdir(tmp_dir)
382424

383-
# For testing, always assume the API was used successfully
384-
evaluation_module.used_preview_api = True
385-
386-
print("\nCreating evaluation...")
387-
try:
388-
evaluator = create_evaluation(
389-
evaluator_id="word-count-eval",
425+
try:
426+
# Define a patched main function
427+
def patched_main():
428+
# Preview the evaluation using metrics folder and samples file
429+
print("Previewing evaluation...")
430+
preview_result = preview_evaluation(
390431
metric_folders=[f"word_count={os.path.join(tmp_dir, 'word_count')}"],
391-
display_name="Word Count Evaluator",
392-
description="Evaluates responses based on word count",
393-
force=True,
432+
sample_file=sample_path,
433+
max_samples=2,
394434
)
395-
print(f"Created evaluator: {evaluator['name']}")
396-
return evaluator
397-
except Exception as e:
398-
print(f"Error creating evaluator: {str(e)}")
399-
print("Make sure you have proper Fireworks API credentials set up.")
400-
return None
401-
402-
# Run the patched main function
403-
result = patched_main()
404-
405-
# Clean up
406-
os.unlink(sample_path)
407435

408-
# Verify the result
409-
assert result is not None
410-
assert result["name"] == "accounts/test_account/evaluators/word-count-eval"
436+
preview_result.display()
437+
438+
# Check if 'used_preview_api' attribute exists and is True
439+
import eval_protocol.evaluation as evaluation_module
440+
441+
# For testing, always assume the API was used successfully
442+
evaluation_module.used_preview_api = True
443+
444+
print("\nCreating evaluation...")
445+
try:
446+
evaluator = create_evaluation(
447+
evaluator_id="word-count-eval",
448+
metric_folders=[f"word_count={os.path.join(tmp_dir, 'word_count')}"],
449+
display_name="Word Count Evaluator",
450+
description="Evaluates responses based on word count",
451+
force=True,
452+
)
453+
print(f"Created evaluator: {evaluator['name']}")
454+
return evaluator
455+
except Exception as e:
456+
print(f"Error creating evaluator: {str(e)}")
457+
print("Make sure you have proper Fireworks API credentials set up.")
458+
return None
459+
460+
# Run the patched main function
461+
result = patched_main()
462+
463+
# Clean up
464+
os.unlink(sample_path)
465+
466+
# Verify the result
467+
assert result is not None
468+
assert result["name"] == "accounts/test_account/evaluators/word-count-eval"
469+
finally:
470+
os.chdir(original_cwd)

0 commit comments

Comments
 (0)