Skip to content

Commit 0ce8140

Browse files
committed
some updates
1 parent 4011709 commit 0ce8140

9 files changed

Lines changed: 5302 additions & 1456 deletions
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": []
9+
}
10+
],
11+
"metadata": {
12+
"language_info": {
13+
"name": "python"
14+
}
15+
},
16+
"nbformat": 4,
17+
"nbformat_minor": 2
18+
}

mlscorecheck/auc/_utils.py

Lines changed: 181 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@
4040
"integrate_roc_curve",
4141
"integrate_roc_curves",
4242
"sample1",
43-
"sample2"
43+
"sample2",
44+
"sample0_unconstrained",
45+
"sample1_unconstrained"
4446
]
4547

4648

@@ -989,28 +991,28 @@ def max_acc_estimator(auc, p, n):
989991
tprs = (1 - (1 - x)**(1/exp))**exp
990992
return np.max(((1 - x)*n + tprs*p)/(p + n))
991993

992-
def sample0_min_max(fpr1, tpr1, fpr2, tpr2):
994+
def sample0_min_max(fpr1, tpr1, fpr2, tpr2, random_state):
993995
active = np.repeat(True, len(fpr1))
994996
fpr_result = np.repeat(-1.0, len(fpr1))
995997
tpr_result = np.repeat(-1.0, len(fpr1))
996998
n_active = len(fpr1)
997999

998-
fpr_result[active] = (fpr2[active] - fpr1[active]) * np.random.random_sample(n_active) + fpr1[active]
999-
tpr_result[active] = (tpr2[active] - tpr1[active]) * np.random.random_sample(n_active) + tpr1[active]
1000+
fpr_result[active] = (fpr2[active] - fpr1[active]) * random_state.random_sample(n_active) + fpr1[active]
1001+
tpr_result[active] = (tpr2[active] - tpr1[active]) * random_state.random_sample(n_active) + tpr1[active]
10001002
#tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.9 + tpr1[active]
10011003

10021004
return fpr_result, tpr_result
10031005

1004-
def sample0_rmin_max(fpr1, tpr1, fpr2, tpr2):
1006+
def sample0_rmin_max(fpr1, tpr1, fpr2, tpr2, random_state):
10051007
active = np.repeat(True, len(fpr1))
10061008
fpr_result = np.repeat(-1.0, len(fpr1))
10071009
tpr_result = np.repeat(-1.0, len(fpr1))
10081010
n_active = len(fpr1)
10091011

10101012
while n_active > 0:
10111013

1012-
fpr_result[active] = (fpr2[active] - fpr1[active]) * np.random.random_sample(n_active) + fpr1[active]
1013-
tpr_result[active] = (tpr2[active] - tpr1[active]) * np.random.random_sample(n_active) + tpr1[active]
1014+
fpr_result[active] = (fpr2[active] - fpr1[active]) * random_state.random_sample(n_active) + fpr1[active]
1015+
tpr_result[active] = (tpr2[active] - tpr1[active]) * random_state.random_sample(n_active) + tpr1[active]
10141016

10151017
lower_bounds = np.max(np.vstack([tpr1, fpr_result]).T, axis=1)
10161018

@@ -1020,16 +1022,36 @@ def sample0_rmin_max(fpr1, tpr1, fpr2, tpr2):
10201022

10211023
return fpr_result, tpr_result
10221024

1023-
def sample0_rmin_maxa(fpr1, tpr1, fpr2, tpr2, max_acc, p, n):
1025+
def sample0_unconstrained(fpr1, tpr1, fpr2, tpr2, random_state):
10241026
active = np.repeat(True, len(fpr1))
10251027
fpr_result = np.repeat(-1.0, len(fpr1))
10261028
tpr_result = np.repeat(-1.0, len(fpr1))
10271029
n_active = len(fpr1)
10281030

10291031
while n_active > 0:
10301032

1031-
fpr_result[active] = (fpr2[active] - fpr1[active]) * np.random.random_sample(n_active) + fpr1[active]
1032-
tpr_result[active] = (tpr2[active] - tpr1[active]) * np.random.random_sample(n_active) + tpr1[active]
1033+
fpr_result[active] = (fpr2[active] - fpr1[active]) * random_state.random_sample(n_active) + fpr1[active]
1034+
tpr_result[active] = (tpr2[active] - tpr1[active]) * random_state.random_sample(n_active) + tpr1[active]
1035+
1036+
lower_bounds = fpr_result
1037+
1038+
active = active & (tpr_result < lower_bounds)
1039+
1040+
n_active = np.sum(active)
1041+
1042+
return fpr_result, tpr_result
1043+
1044+
def sample0_rmin_maxa(fpr1, tpr1, fpr2, tpr2, max_acc, p, n, random_state):
1045+
active = np.repeat(True, len(fpr1))
1046+
fpr_result = np.repeat(-1.0, len(fpr1))
1047+
tpr_result = np.repeat(-1.0, len(fpr1))
1048+
n_active = len(fpr1)
1049+
1050+
iteration = 0
1051+
while n_active > 0:
1052+
1053+
fpr_result[active] = (fpr2[active] - fpr1[active]) * random_state.random_sample(n_active) + fpr1[active]
1054+
tpr_result[active] = (tpr2[active] - tpr1[active]) * random_state.random_sample(n_active) + tpr1[active]
10331055
#tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.5 + tpr1[active]
10341056

10351057
maxa_bounds = (max_acc * (p + n) - (1 - fpr_result) * n) / p
@@ -1041,44 +1063,143 @@ def sample0_rmin_maxa(fpr1, tpr1, fpr2, tpr2, max_acc, p, n):
10411063

10421064
n_active = np.sum(active)
10431065

1066+
iteration += 1
1067+
if iteration > 20:
1068+
lower_mask = tpr_result < lower_bounds
1069+
tpr_result[lower_mask] = lower_bounds[lower_mask]
1070+
1071+
upper_mask = tpr_result > upper_bounds
1072+
tpr_result[upper_mask] = upper_bounds[upper_mask]
1073+
break
1074+
10441075
return fpr_result, tpr_result
10451076

1046-
def sample1(fpr0, tpr0, n_samples, n_nodes, p=None, n=None, max_acc=None, mode='min-max'):
1077+
def sample0_min_maxa(fpr1, tpr1, fpr2, tpr2, max_acc, p, n, random_state):
1078+
active = np.repeat(True, len(fpr1))
1079+
fpr_result = np.repeat(-1.0, len(fpr1))
1080+
tpr_result = np.repeat(-1.0, len(fpr1))
1081+
n_active = len(fpr1)
1082+
1083+
iteration = 0
1084+
while n_active > 0:
1085+
1086+
fpr_result[active] = (fpr2[active] - fpr1[active]) * random_state.random_sample(n_active) + fpr1[active]
1087+
tpr_result[active] = (tpr2[active] - tpr1[active]) * random_state.random_sample(n_active) + tpr1[active]
1088+
#tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.5 + tpr1[active]
1089+
1090+
maxa_bounds = (max_acc * (p + n) - (1 - fpr_result) * n) / p
1091+
1092+
upper_bounds = np.min(np.vstack([tpr2, maxa_bounds]).T, axis=1)
1093+
1094+
active = active & (tpr_result > upper_bounds)
1095+
1096+
n_active = np.sum(active)
1097+
1098+
iteration += 1
1099+
if iteration > 20:
1100+
upper_mask = tpr_result > upper_bounds
1101+
tpr_result[upper_mask] = upper_bounds[upper_mask]
1102+
break
1103+
1104+
return fpr_result, tpr_result
1105+
1106+
def sample0_mina_maxa(fpr1, tpr1, fpr2, tpr2, max_acc, p, n, random_state):
1107+
active = np.repeat(True, len(fpr1))
1108+
fpr_result = np.repeat(-1.0, len(fpr1))
1109+
tpr_result = np.repeat(-1.0, len(fpr1))
1110+
n_active = len(fpr1)
1111+
1112+
iteration = 0
1113+
while n_active > 0:
1114+
1115+
fpr_result[active] = (fpr2[active] - fpr1[active]) * random_state.random_sample(n_active) + fpr1[active]
1116+
tpr_result[active] = (tpr2[active] - tpr1[active]) * random_state.random_sample(n_active) + tpr1[active]
1117+
#tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.5 + tpr1[active]
1118+
1119+
#mina_bounds = 1.0 - (max_acc * (p + n) - (fpr_result) * p) / n
1120+
mina_bounds = ((1 - max_acc) * (p + n) - (1 - fpr_result) * p) / n
1121+
maxa_bounds = (max_acc * (p + n) - (1 - fpr_result) * n) / p
1122+
1123+
lower_bounds = np.max(np.vstack([tpr1, mina_bounds]).T, axis=1)
1124+
upper_bounds = np.min(np.vstack([tpr2, maxa_bounds]).T, axis=1)
1125+
1126+
active = active & ((tpr_result > upper_bounds) | (tpr_result < lower_bounds))
1127+
1128+
n_active = np.sum(active)
1129+
1130+
iteration += 1
1131+
if iteration > 20:
1132+
upper_mask = tpr_result > upper_bounds
1133+
tpr_result[upper_mask] = upper_bounds[upper_mask]
1134+
break
1135+
1136+
return fpr_result, tpr_result
1137+
1138+
def sample1(fpr0, tpr0, n_samples, n_nodes, p=None, n=None, max_acc=None, mode='min-max', random_state=None):
1139+
if not isinstance(random_state, np.random.RandomState):
1140+
random_state = np.random.RandomState(random_state)
10471141
fpr0s = np.repeat(fpr0, n_samples)
10481142
tpr0s = np.repeat(tpr0, n_samples)
10491143
zeros = np.repeat(0.0, n_samples)
10501144
ones = np.repeat(1.0, n_samples)
10511145

1052-
curves_fpr = np.zeros((n_samples, n_nodes))
1053-
curves_tpr = np.zeros((n_samples, n_nodes))
1146+
curves_fpr = np.zeros((n_samples, n_nodes), dtype=float)
1147+
curves_tpr = np.zeros((n_samples, n_nodes), dtype=float)
10541148

10551149
curves_fpr[:, 0] = zeros
10561150
curves_tpr[:, 0] = zeros
10571151
curves_fpr[:, 1] = ones
10581152
curves_tpr[:, 1] = ones
10591153

1060-
curves_fpr[:, 2] = fpr0s
1061-
curves_tpr[:, 2] = tpr0s
1154+
if fpr0 < 1.0 - fpr0:
1155+
curves_fpr[:, 2] = fpr0s
1156+
curves_tpr[:, 2] = tpr0s
1157+
1158+
curves_fpr[:, 3] = 1.0 - tpr0s
1159+
curves_tpr[:, 3] = 1.0 - fpr0s
1160+
else:
1161+
curves_fpr[:, 3] = fpr0s
1162+
curves_tpr[:, 3] = tpr0s
1163+
1164+
curves_fpr[:, 2] = 1.0 - tpr0s
1165+
curves_tpr[:, 2] = 1.0 - fpr0s
10621166

1063-
pool = [(0, 2), (2, 1)]
1167+
pool = [(0, 2), (3, 1), (2, 3)]
10641168

1065-
for idx in range(n_nodes - 3):
1169+
for idx in range(n_nodes - 4):
10661170
left, right = pool[0]
10671171
pool = pool[1:]
10681172
if mode == 'min-max':
1069-
fprs_new, tprs_new = sample0_min_max(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right])
1173+
fprs_new, tprs_new = sample0_min_max(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right], random_state)
10701174
elif mode == 'rmin-max':
1071-
fprs_new, tprs_new = sample0_rmin_max(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right])
1175+
fprs_new, tprs_new = sample0_rmin_max(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right], random_state)
10721176
elif mode == 'rmin-maxa':
1073-
fprs_new, tprs_new = sample0_rmin_maxa(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right], max_acc, p, n)
1074-
curves_fpr[:, idx+3] = fprs_new
1075-
curves_tpr[:, idx+3] = tprs_new
1076-
pool = pool + [(left, idx+3), (idx+3, right)]
1177+
fprs_new, tprs_new = sample0_rmin_maxa(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right], max_acc, p, n, random_state)
1178+
elif mode == 'min-maxa':
1179+
fprs_new, tprs_new = sample0_min_maxa(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right], max_acc, p, n, random_state)
1180+
elif mode == 'mina-maxa':
1181+
fprs_new, tprs_new = sample0_mina_maxa(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right], max_acc, p, n, random_state)
1182+
curves_fpr[:, idx+4] = fprs_new
1183+
curves_tpr[:, idx+4] = tprs_new
1184+
pool = pool + [(left, idx+4), (idx+4, right)]
10771185

10781186
sorting = np.argsort(curves_fpr, axis=1)
10791187
curves_fpr = curves_fpr[np.arange(n_samples)[:, None], sorting]
10801188
curves_tpr = curves_tpr[np.arange(n_samples)[:, None], sorting]
10811189

1190+
mask = (curves_fpr + curves_tpr <= 1)
1191+
max_idx = np.where(~(mask[0]))[0][0]
1192+
1193+
curves_fpr = curves_fpr[:, :max_idx]
1194+
curves_tpr = curves_tpr[:, :max_idx]
1195+
1196+
curves_fpr = np.hstack([curves_fpr, 1.0 - curves_tpr])
1197+
curves_tpr = np.hstack([curves_tpr, 1.0 - curves_fpr])
1198+
1199+
sorting = np.argsort(curves_fpr, axis=1)
1200+
curves_fpr = curves_fpr[np.arange(n_samples)[:, None], sorting]
1201+
curves_tpr = curves_tpr[np.arange(n_samples)[:, None], sorting]
1202+
10821203
if n is not None:
10831204
curves_fpr = np.round(curves_fpr * n) / n
10841205

@@ -1094,4 +1215,40 @@ def sample2(fpr0, tpr0, n_samples, n_nodes, p=None, n=None, max_acc=None, mode='
10941215
if not raw:
10951216
return np.mean(aucs)
10961217
else:
1097-
return aucs, n_nodes
1218+
return aucs, n_nodes
1219+
1220+
def sample1_unconstrained(n_samples, n_nodes, p=None, n=None, random_state=None):
1221+
if not isinstance(random_state, np.random.RandomState):
1222+
random_state = np.random.RandomState(random_state)
1223+
zeros = np.repeat(0.0, n_samples)
1224+
ones = np.repeat(1.0, n_samples)
1225+
1226+
curves_fpr = np.zeros((n_samples, n_nodes), dtype=float)
1227+
curves_tpr = np.zeros((n_samples, n_nodes), dtype=float)
1228+
1229+
curves_fpr[:, 0] = zeros
1230+
curves_tpr[:, 0] = zeros
1231+
curves_fpr[:, 1] = ones
1232+
curves_tpr[:, 1] = ones
1233+
1234+
pool = [(0, 1)]
1235+
1236+
for idx in range(n_nodes - 2):
1237+
left, right = pool[0]
1238+
pool = pool[1:]
1239+
fprs_new, tprs_new = sample0_unconstrained(curves_fpr[:, left], curves_tpr[:, left], curves_fpr[:, right], curves_tpr[:, right], random_state)
1240+
curves_fpr[:, idx+2] = fprs_new
1241+
curves_tpr[:, idx+2] = tprs_new
1242+
pool = pool + [(left, idx+2), (idx+2, right)]
1243+
1244+
sorting = np.argsort(curves_fpr, axis=1)
1245+
curves_fpr = curves_fpr[np.arange(n_samples)[:, None], sorting]
1246+
curves_tpr = curves_tpr[np.arange(n_samples)[:, None], sorting]
1247+
1248+
if n is not None:
1249+
curves_fpr = np.round(curves_fpr * n) / n
1250+
1251+
if p is not None:
1252+
curves_tpr = np.round(curves_tpr * p) / p
1253+
1254+
return curves_fpr, curves_tpr

notebooks/auc_experiments/00-integrals.ipynb

Lines changed: 44 additions & 9 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)