-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_evaluation_logic.py
More file actions
32 lines (26 loc) · 1.18 KB
/
test_evaluation_logic.py
File metadata and controls
32 lines (26 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env python3
"""
Simple test to verify MMLU evaluation logic without loading complex models
"""
# Mock some data to test the evaluation logic
correct_answers = [1, 2, 3, 0] # B, C, D, A
predicted_answers = [0, 0, 0, 0] # Always predict A
print("Testing evaluation logic:")
print("Correct answers (indices):", correct_answers)
print("Predicted answers (indices):", predicted_answers)
# Test the target creation logic from the evaluation function
for i, (correct, predicted) in enumerate(zip(correct_answers, predicted_answers)):
# This mimics the evaluation logic
answer_targets = []
correct_answers_batch = [correct] # Batch of 1
for ans in correct_answers_batch:
if isinstance(ans, str) and ans in ['A', 'B', 'C', 'D']:
answer_targets.append(ord(ans.upper()) - ord('A'))
elif isinstance(ans, int) and 0 <= ans <= 3:
answer_targets.append(ans)
else:
answer_targets.append(0) # Fallback
target_tensor_val = answer_targets[0]
predicted_val = predicted
is_correct = (predicted_val == target_tensor_val)
print(f"Q{i+1}: Correct={target_tensor_val}, Predicted={predicted_val}, Is_Correct={is_correct}")