-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathcalc_accuracy.py
More file actions
122 lines (106 loc) · 3.91 KB
/
calc_accuracy.py
File metadata and controls
122 lines (106 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def suff_stats(h, m, epsilon):
"""
+------------+-----------+-----------+-----------+
| Notation | Model Prediction |
| | < | = | > |
+------------+-----------+-----------+-----------+
| Human < | C | Tm | D |
| Label = | Th | Thm | Th |
| > | D | Tm | C |
+------------+-----------+-----------+-----------+
C: Consistent on the preference,
D: Discordant on the preference,
Th: Human ties but model doesn't,
Tm: Model ties but human doesn't,
Thm: Both human and model ties,
epsilon: threshold for ties
"""
C = D = Th = Tm = Thm = 0
for hi, mi in zip(h, m):
if hi == 0 and abs(mi) <= epsilon:
Thm += 1
elif hi == 0:
Th += 1
elif abs(mi) <= epsilon:
Tm += 1
elif hi * mi > 0:
C += 1
else:
D += 1
return C, D, Th, Tm, Thm
def calc_acc(C, D, Th, Tm, Thm):
# This function calculates the current accuracy based on the statistics
return (C + Thm) / (C + D + Th + Tm + Thm)
def calc_accuracy_with_ties(h, m):
"""
algorithm: https://arxiv.org/abs/2305.14324
O(N^2logN)
Input:
h: list of N human labels, 1 for prefer A, -1 for prefer B, 0 for ties
m: list of N model predictions, can be obtained by Score(A) - Score(B)
Output:
acc_star: accuracy-with-ties
"""
try:
C, D, Th, Tm, Thm = suff_stats(h, m, -1)
sorted_pairs = sorted(zip(h, m), key=lambda x: abs(x[1]))
acc_star = float('-inf')
epsilon_star = 0
epsilon_curr = -1
current_stat = {
'C': C, 'D': D, 'Th': Th, 'Tm': Tm, 'Thm': Thm
}
# print(current_thresholds)
for hi, mi in sorted_pairs:
# update the statistics by removing the current pair
if hi == 0 and abs(mi) < epsilon_curr:
current_stat['Thm'] -= 1
elif hi == 0:
current_stat['Th'] -= 1
elif abs(mi) < epsilon_curr:
current_stat['Tm'] -= 1
elif hi * mi > 0:
current_stat['C'] -= 1
else:
current_stat['D'] -= 1
# update the epsilon value
epsilon_curr = abs(mi)
# update the statistics by adding the current pair
if hi == 0 and abs(mi) <= epsilon_curr:
current_stat['Thm'] += 1
elif hi == 0:
current_stat['Th'] += 1
elif abs(mi) <= epsilon_curr:
current_stat['Tm'] += 1
elif hi * mi > 0:
current_stat['C'] += 1
else:
current_stat['D'] += 1
# calculate the new tau value
acc_curr = calc_acc(**current_stat)
if acc_curr > acc_star:
acc_star = acc_curr
epsilon_star = epsilon_curr
# print(current_thresholds)
# print("epsilon_star:", epsilon_star)
return acc_star
except Exception as e:
print("Error in tie_calibration:", e)
return 0
def calc_accuracy_without_ties(h, m):
"""
Input:
h: list of N human labels, 1 for prefer A, -1 for prefer B, 0 for ties
m: list of N model predictions, can be obtained by Score(A) - Score(B)
Output:
acc_star: accuracy-without-ties
"""
C, D, Th, Tm, Thm = suff_stats(h, m, -1)
return C / (C + D + Tm)
if __name__ == "__main__":
h = [1, -1, 0, 1, -1, 0, 1, -1, 0]
scores_A = [0.9, -0.7, 0.1, 0.8, -0.6, 0.2, 0.7, -0.5, 0.3]
scores_B = [0.1, -0.8, 0.5, 0.5, -0.3, 0.3, 0.4, -0.4, 0.4]
m = [a - b for a, b in zip(scores_A, scores_B)]
print("Accuracy with ties:", calc_accuracy_with_ties(h, m))
print("Accuracy without ties:", calc_accuracy_without_ties(h, m))