@@ -91,13 +91,51 @@ def mock_create_api():
9191 "description" : "Evaluates responses based on word count" ,
9292 }
9393
94+ def side_effect (* args , ** kwargs ):
95+ url = args [0 ]
96+ payload = kwargs .get ("json" , {})
97+ response = mock_post .return_value
98+
99+ if "getUploadEndpoint" in url :
100+ # Return signed URL for upload
101+ filename_to_size = payload .get ("filename_to_size" , {})
102+ signed_urls = {}
103+ for filename in filename_to_size .keys ():
104+ signed_urls [filename ] = f"https://storage.googleapis.com/test-bucket/{ filename } ?signed=true"
105+ response .json .return_value = {"filenameToSignedUrls" : signed_urls }
106+ elif "validateUpload" in url :
107+ response .json .return_value = {"success" : True , "valid" : True }
108+ else :
109+ response .json .return_value = create_response
110+
111+ response .status_code = 200
112+ return response
113+
114+ mock_post .side_effect = side_effect
94115 mock_post .return_value = MagicMock ()
95116 mock_post .return_value .status_code = 200
96117 mock_post .return_value .json .return_value = create_response
118+ mock_post .return_value .raise_for_status = MagicMock ()
97119
98120 yield mock_post
99121
100122
123+ @pytest .fixture
124+ def mock_gcs_upload ():
125+ """Mock the GCS upload via requests.Session"""
126+ with patch ("requests.Session" ) as mock_session_class :
127+ mock_session = MagicMock ()
128+ mock_session_class .return_value = mock_session
129+
130+ # Mock successful GCS upload
131+ mock_gcs_response = MagicMock ()
132+ mock_gcs_response .status_code = 200
133+ mock_gcs_response .raise_for_status = MagicMock ()
134+ mock_session .send .return_value = mock_gcs_response
135+
136+ yield mock_session
137+
138+
101139@pytest .fixture
102140def mock_word_count_metric ():
103141 """Create a temporary directory with a word count metric"""
@@ -255,7 +293,7 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
255293 assert "word_count" in result .results [0 ].per_metric_evals
256294
257295
258- def test_create_evaluation (mock_env_variables , mock_create_api , monkeypatch ):
296+ def test_create_evaluation (mock_env_variables , mock_create_api , mock_gcs_upload , monkeypatch ):
259297 """Test the create_evaluation function in isolation"""
260298 from eval_protocol .evaluation import create_evaluation
261299
@@ -285,22 +323,33 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
285323"""
286324 )
287325
288- # Call create_evaluation
289- result = create_evaluation (
290- evaluator_id = "word-count-eval" ,
291- metric_folders = [f"word_count={ os .path .join (tmp_dir , 'word_count' )} " ],
292- display_name = "Word Count Evaluator" ,
293- description = "Evaluates responses based on word count" ,
294- force = True ,
295- )
326+ # Create requirements.txt
327+ with open (os .path .join (tmp_dir , "requirements.txt" ), "w" ) as f :
328+ f .write ("eval-protocol>=0.1.0\n " )
296329
297- # Verify results
298- assert result ["name" ] == "accounts/test_account/evaluators/word-count-eval"
299- assert result ["displayName" ] == "Word Count Evaluator"
300- assert result ["description" ] == "Evaluates responses based on word count"
330+ # Change to temp directory
331+ original_cwd = os .getcwd ()
332+ os .chdir (tmp_dir )
333+
334+ try :
335+ # Call create_evaluation
336+ result = create_evaluation (
337+ evaluator_id = "word-count-eval" ,
338+ metric_folders = [f"word_count={ os .path .join (tmp_dir , 'word_count' )} " ],
339+ display_name = "Word Count Evaluator" ,
340+ description = "Evaluates responses based on word count" ,
341+ force = True ,
342+ )
301343
344+ # Verify results
345+ assert result ["name" ] == "accounts/test_account/evaluators/word-count-eval"
346+ assert result ["displayName" ] == "Word Count Evaluator"
347+ assert result ["description" ] == "Evaluates responses based on word count"
348+ finally :
349+ os .chdir (original_cwd )
302350
303- def test_preview_then_create (monkeypatch , mock_env_variables , mock_preview_api , mock_create_api ):
351+
352+ def test_preview_then_create (monkeypatch , mock_env_variables , mock_preview_api , mock_create_api , mock_gcs_upload ):
304353 """Test the full example flow (simulated)"""
305354 # Patch input to always return 'y'
306355 monkeypatch .setattr ("builtins.input" , lambda _ : "y" )
@@ -330,6 +379,10 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
330379"""
331380 )
332381
382+ # Create requirements.txt
383+ with open (os .path .join (tmp_dir , "requirements.txt" ), "w" ) as f :
384+ f .write ("eval-protocol>=0.1.0\n " )
385+
333386 # Create a temporary sample file
334387 sample_fd , sample_path = tempfile .mkstemp (suffix = ".jsonl" )
335388 with os .fdopen (sample_fd , "w" ) as f :
@@ -365,46 +418,53 @@ def evaluate(messages, ground_truth=None, tools=None, **kwargs):
365418 # Create a patched example module with modified paths
366419 from eval_protocol .evaluation import create_evaluation , preview_evaluation
367420
368- # Define a patched main function
369- def patched_main ():
370- # Preview the evaluation using metrics folder and samples file
371- print ("Previewing evaluation..." )
372- preview_result = preview_evaluation (
373- metric_folders = [f"word_count={ os .path .join (tmp_dir , 'word_count' )} " ],
374- sample_file = sample_path ,
375- max_samples = 2 ,
376- )
377-
378- preview_result .display ()
379-
380- # Check if 'used_preview_api' attribute exists and is True
381- import eval_protocol .evaluation as evaluation_module
421+ # Change to temp directory
422+ original_cwd = os .getcwd ()
423+ os .chdir (tmp_dir )
382424
383- # For testing, always assume the API was used successfully
384- evaluation_module .used_preview_api = True
385-
386- print ("\n Creating evaluation..." )
387- try :
388- evaluator = create_evaluation (
389- evaluator_id = "word-count-eval" ,
425+ try :
426+ # Define a patched main function
427+ def patched_main ():
428+ # Preview the evaluation using metrics folder and samples file
429+ print ("Previewing evaluation..." )
430+ preview_result = preview_evaluation (
390431 metric_folders = [f"word_count={ os .path .join (tmp_dir , 'word_count' )} " ],
391- display_name = "Word Count Evaluator" ,
392- description = "Evaluates responses based on word count" ,
393- force = True ,
432+ sample_file = sample_path ,
433+ max_samples = 2 ,
394434 )
395- print (f"Created evaluator: { evaluator ['name' ]} " )
396- return evaluator
397- except Exception as e :
398- print (f"Error creating evaluator: { str (e )} " )
399- print ("Make sure you have proper Fireworks API credentials set up." )
400- return None
401-
402- # Run the patched main function
403- result = patched_main ()
404-
405- # Clean up
406- os .unlink (sample_path )
407435
408- # Verify the result
409- assert result is not None
410- assert result ["name" ] == "accounts/test_account/evaluators/word-count-eval"
436+ preview_result .display ()
437+
438+ # Check if 'used_preview_api' attribute exists and is True
439+ import eval_protocol .evaluation as evaluation_module
440+
441+ # For testing, always assume the API was used successfully
442+ evaluation_module .used_preview_api = True
443+
444+ print ("\n Creating evaluation..." )
445+ try :
446+ evaluator = create_evaluation (
447+ evaluator_id = "word-count-eval" ,
448+ metric_folders = [f"word_count={ os .path .join (tmp_dir , 'word_count' )} " ],
449+ display_name = "Word Count Evaluator" ,
450+ description = "Evaluates responses based on word count" ,
451+ force = True ,
452+ )
453+ print (f"Created evaluator: { evaluator ['name' ]} " )
454+ return evaluator
455+ except Exception as e :
456+ print (f"Error creating evaluator: { str (e )} " )
457+ print ("Make sure you have proper Fireworks API credentials set up." )
458+ return None
459+
460+ # Run the patched main function
461+ result = patched_main ()
462+
463+ # Clean up
464+ os .unlink (sample_path )
465+
466+ # Verify the result
467+ assert result is not None
468+ assert result ["name" ] == "accounts/test_account/evaluators/word-count-eval"
469+ finally :
470+ os .chdir (original_cwd )
0 commit comments