4040 "integrate_roc_curve" ,
4141 "integrate_roc_curves" ,
4242 "sample1" ,
43- "sample2"
43+ "sample2" ,
44+ "sample0_unconstrained" ,
45+ "sample1_unconstrained"
4446]
4547
4648
@@ -989,28 +991,28 @@ def max_acc_estimator(auc, p, n):
989991 tprs = (1 - (1 - x )** (1 / exp ))** exp
990992 return np .max (((1 - x )* n + tprs * p )/ (p + n ))
991993
992- def sample0_min_max (fpr1 , tpr1 , fpr2 , tpr2 ):
994+ def sample0_min_max (fpr1 , tpr1 , fpr2 , tpr2 , random_state ):
993995 active = np .repeat (True , len (fpr1 ))
994996 fpr_result = np .repeat (- 1.0 , len (fpr1 ))
995997 tpr_result = np .repeat (- 1.0 , len (fpr1 ))
996998 n_active = len (fpr1 )
997999
998- fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * np . random .random_sample (n_active ) + fpr1 [active ]
999- tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * np . random .random_sample (n_active ) + tpr1 [active ]
1000+ fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * random_state .random_sample (n_active ) + fpr1 [active ]
1001+ tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * random_state .random_sample (n_active ) + tpr1 [active ]
10001002 #tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.9 + tpr1[active]
10011003
10021004 return fpr_result , tpr_result
10031005
1004- def sample0_rmin_max (fpr1 , tpr1 , fpr2 , tpr2 ):
1006+ def sample0_rmin_max (fpr1 , tpr1 , fpr2 , tpr2 , random_state ):
10051007 active = np .repeat (True , len (fpr1 ))
10061008 fpr_result = np .repeat (- 1.0 , len (fpr1 ))
10071009 tpr_result = np .repeat (- 1.0 , len (fpr1 ))
10081010 n_active = len (fpr1 )
10091011
10101012 while n_active > 0 :
10111013
1012- fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * np . random .random_sample (n_active ) + fpr1 [active ]
1013- tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * np . random .random_sample (n_active ) + tpr1 [active ]
1014+ fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * random_state .random_sample (n_active ) + fpr1 [active ]
1015+ tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * random_state .random_sample (n_active ) + tpr1 [active ]
10141016
10151017 lower_bounds = np .max (np .vstack ([tpr1 , fpr_result ]).T , axis = 1 )
10161018
@@ -1020,16 +1022,36 @@ def sample0_rmin_max(fpr1, tpr1, fpr2, tpr2):
10201022
10211023 return fpr_result , tpr_result
10221024
1023- def sample0_rmin_maxa (fpr1 , tpr1 , fpr2 , tpr2 , max_acc , p , n ):
1025+ def sample0_unconstrained (fpr1 , tpr1 , fpr2 , tpr2 , random_state ):
10241026 active = np .repeat (True , len (fpr1 ))
10251027 fpr_result = np .repeat (- 1.0 , len (fpr1 ))
10261028 tpr_result = np .repeat (- 1.0 , len (fpr1 ))
10271029 n_active = len (fpr1 )
10281030
10291031 while n_active > 0 :
10301032
1031- fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * np .random .random_sample (n_active ) + fpr1 [active ]
1032- tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * np .random .random_sample (n_active ) + tpr1 [active ]
1033+ fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * random_state .random_sample (n_active ) + fpr1 [active ]
1034+ tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * random_state .random_sample (n_active ) + tpr1 [active ]
1035+
1036+ lower_bounds = fpr_result
1037+
1038+ active = active & (tpr_result < lower_bounds )
1039+
1040+ n_active = np .sum (active )
1041+
1042+ return fpr_result , tpr_result
1043+
1044+ def sample0_rmin_maxa (fpr1 , tpr1 , fpr2 , tpr2 , max_acc , p , n , random_state ):
1045+ active = np .repeat (True , len (fpr1 ))
1046+ fpr_result = np .repeat (- 1.0 , len (fpr1 ))
1047+ tpr_result = np .repeat (- 1.0 , len (fpr1 ))
1048+ n_active = len (fpr1 )
1049+
1050+ iteration = 0
1051+ while n_active > 0 :
1052+
1053+ fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * random_state .random_sample (n_active ) + fpr1 [active ]
1054+ tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * random_state .random_sample (n_active ) + tpr1 [active ]
10331055 #tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.5 + tpr1[active]
10341056
10351057 maxa_bounds = (max_acc * (p + n ) - (1 - fpr_result ) * n ) / p
@@ -1041,44 +1063,143 @@ def sample0_rmin_maxa(fpr1, tpr1, fpr2, tpr2, max_acc, p, n):
10411063
10421064 n_active = np .sum (active )
10431065
1066+ iteration += 1
1067+ if iteration > 20 :
1068+ lower_mask = tpr_result < lower_bounds
1069+ tpr_result [lower_mask ] = lower_bounds [lower_mask ]
1070+
1071+ upper_mask = tpr_result > upper_bounds
1072+ tpr_result [upper_mask ] = upper_bounds [upper_mask ]
1073+ break
1074+
10441075 return fpr_result , tpr_result
10451076
1046- def sample1 (fpr0 , tpr0 , n_samples , n_nodes , p = None , n = None , max_acc = None , mode = 'min-max' ):
1077+ def sample0_min_maxa (fpr1 , tpr1 , fpr2 , tpr2 , max_acc , p , n , random_state ):
1078+ active = np .repeat (True , len (fpr1 ))
1079+ fpr_result = np .repeat (- 1.0 , len (fpr1 ))
1080+ tpr_result = np .repeat (- 1.0 , len (fpr1 ))
1081+ n_active = len (fpr1 )
1082+
1083+ iteration = 0
1084+ while n_active > 0 :
1085+
1086+ fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * random_state .random_sample (n_active ) + fpr1 [active ]
1087+ tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * random_state .random_sample (n_active ) + tpr1 [active ]
1088+ #tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.5 + tpr1[active]
1089+
1090+ maxa_bounds = (max_acc * (p + n ) - (1 - fpr_result ) * n ) / p
1091+
1092+ upper_bounds = np .min (np .vstack ([tpr2 , maxa_bounds ]).T , axis = 1 )
1093+
1094+ active = active & (tpr_result > upper_bounds )
1095+
1096+ n_active = np .sum (active )
1097+
1098+ iteration += 1
1099+ if iteration > 20 :
1100+ upper_mask = tpr_result > upper_bounds
1101+ tpr_result [upper_mask ] = upper_bounds [upper_mask ]
1102+ break
1103+
1104+ return fpr_result , tpr_result
1105+
1106+ def sample0_mina_maxa (fpr1 , tpr1 , fpr2 , tpr2 , max_acc , p , n , random_state ):
1107+ active = np .repeat (True , len (fpr1 ))
1108+ fpr_result = np .repeat (- 1.0 , len (fpr1 ))
1109+ tpr_result = np .repeat (- 1.0 , len (fpr1 ))
1110+ n_active = len (fpr1 )
1111+
1112+ iteration = 0
1113+ while n_active > 0 :
1114+
1115+ fpr_result [active ] = (fpr2 [active ] - fpr1 [active ]) * random_state .random_sample (n_active ) + fpr1 [active ]
1116+ tpr_result [active ] = (tpr2 [active ] - tpr1 [active ]) * random_state .random_sample (n_active ) + tpr1 [active ]
1117+ #tpr_result[active] = (tpr2[active] - tpr1[active]) * 0.5 + tpr1[active]
1118+
1119+ #mina_bounds = 1.0 - (max_acc * (p + n) - (fpr_result) * p) / n
1120+ mina_bounds = ((1 - max_acc ) * (p + n ) - (1 - fpr_result ) * p ) / n
1121+ maxa_bounds = (max_acc * (p + n ) - (1 - fpr_result ) * n ) / p
1122+
1123+ lower_bounds = np .max (np .vstack ([tpr1 , mina_bounds ]).T , axis = 1 )
1124+ upper_bounds = np .min (np .vstack ([tpr2 , maxa_bounds ]).T , axis = 1 )
1125+
1126+ active = active & ((tpr_result > upper_bounds ) | (tpr_result < lower_bounds ))
1127+
1128+ n_active = np .sum (active )
1129+
1130+ iteration += 1
1131+ if iteration > 20 :
1132+ upper_mask = tpr_result > upper_bounds
1133+ tpr_result [upper_mask ] = upper_bounds [upper_mask ]
1134+ break
1135+
1136+ return fpr_result , tpr_result
1137+
1138+ def sample1 (fpr0 , tpr0 , n_samples , n_nodes , p = None , n = None , max_acc = None , mode = 'min-max' , random_state = None ):
1139+ if not isinstance (random_state , np .random .RandomState ):
1140+ random_state = np .random .RandomState (random_state )
10471141 fpr0s = np .repeat (fpr0 , n_samples )
10481142 tpr0s = np .repeat (tpr0 , n_samples )
10491143 zeros = np .repeat (0.0 , n_samples )
10501144 ones = np .repeat (1.0 , n_samples )
10511145
1052- curves_fpr = np .zeros ((n_samples , n_nodes ))
1053- curves_tpr = np .zeros ((n_samples , n_nodes ))
1146+ curves_fpr = np .zeros ((n_samples , n_nodes ), dtype = float )
1147+ curves_tpr = np .zeros ((n_samples , n_nodes ), dtype = float )
10541148
10551149 curves_fpr [:, 0 ] = zeros
10561150 curves_tpr [:, 0 ] = zeros
10571151 curves_fpr [:, 1 ] = ones
10581152 curves_tpr [:, 1 ] = ones
10591153
1060- curves_fpr [:, 2 ] = fpr0s
1061- curves_tpr [:, 2 ] = tpr0s
1154+ if fpr0 < 1.0 - fpr0 :
1155+ curves_fpr [:, 2 ] = fpr0s
1156+ curves_tpr [:, 2 ] = tpr0s
1157+
1158+ curves_fpr [:, 3 ] = 1.0 - tpr0s
1159+ curves_tpr [:, 3 ] = 1.0 - fpr0s
1160+ else :
1161+ curves_fpr [:, 3 ] = fpr0s
1162+ curves_tpr [:, 3 ] = tpr0s
1163+
1164+ curves_fpr [:, 2 ] = 1.0 - tpr0s
1165+ curves_tpr [:, 2 ] = 1.0 - fpr0s
10621166
1063- pool = [(0 , 2 ), (2 , 1 )]
1167+ pool = [(0 , 2 ), (3 , 1 ), ( 2 , 3 )]
10641168
1065- for idx in range (n_nodes - 3 ):
1169+ for idx in range (n_nodes - 4 ):
10661170 left , right = pool [0 ]
10671171 pool = pool [1 :]
10681172 if mode == 'min-max' :
1069- fprs_new , tprs_new = sample0_min_max (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ])
1173+ fprs_new , tprs_new = sample0_min_max (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ], random_state )
10701174 elif mode == 'rmin-max' :
1071- fprs_new , tprs_new = sample0_rmin_max (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ])
1175+ fprs_new , tprs_new = sample0_rmin_max (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ], random_state )
10721176 elif mode == 'rmin-maxa' :
1073- fprs_new , tprs_new = sample0_rmin_maxa (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ], max_acc , p , n )
1074- curves_fpr [:, idx + 3 ] = fprs_new
1075- curves_tpr [:, idx + 3 ] = tprs_new
1076- pool = pool + [(left , idx + 3 ), (idx + 3 , right )]
1177+ fprs_new , tprs_new = sample0_rmin_maxa (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ], max_acc , p , n , random_state )
1178+ elif mode == 'min-maxa' :
1179+ fprs_new , tprs_new = sample0_min_maxa (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ], max_acc , p , n , random_state )
1180+ elif mode == 'mina-maxa' :
1181+ fprs_new , tprs_new = sample0_mina_maxa (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ], max_acc , p , n , random_state )
1182+ curves_fpr [:, idx + 4 ] = fprs_new
1183+ curves_tpr [:, idx + 4 ] = tprs_new
1184+ pool = pool + [(left , idx + 4 ), (idx + 4 , right )]
10771185
10781186 sorting = np .argsort (curves_fpr , axis = 1 )
10791187 curves_fpr = curves_fpr [np .arange (n_samples )[:, None ], sorting ]
10801188 curves_tpr = curves_tpr [np .arange (n_samples )[:, None ], sorting ]
10811189
1190+ mask = (curves_fpr + curves_tpr <= 1 )
1191+ max_idx = np .where (~ (mask [0 ]))[0 ][0 ]
1192+
1193+ curves_fpr = curves_fpr [:, :max_idx ]
1194+ curves_tpr = curves_tpr [:, :max_idx ]
1195+
1196+ curves_fpr = np .hstack ([curves_fpr , 1.0 - curves_tpr ])
1197+ curves_tpr = np .hstack ([curves_tpr , 1.0 - curves_fpr ])
1198+
1199+ sorting = np .argsort (curves_fpr , axis = 1 )
1200+ curves_fpr = curves_fpr [np .arange (n_samples )[:, None ], sorting ]
1201+ curves_tpr = curves_tpr [np .arange (n_samples )[:, None ], sorting ]
1202+
10821203 if n is not None :
10831204 curves_fpr = np .round (curves_fpr * n ) / n
10841205
@@ -1094,4 +1215,40 @@ def sample2(fpr0, tpr0, n_samples, n_nodes, p=None, n=None, max_acc=None, mode='
10941215 if not raw :
10951216 return np .mean (aucs )
10961217 else :
1097- return aucs , n_nodes
1218+ return aucs , n_nodes
1219+
1220+ def sample1_unconstrained (n_samples , n_nodes , p = None , n = None , random_state = None ):
1221+ if not isinstance (random_state , np .random .RandomState ):
1222+ random_state = np .random .RandomState (random_state )
1223+ zeros = np .repeat (0.0 , n_samples )
1224+ ones = np .repeat (1.0 , n_samples )
1225+
1226+ curves_fpr = np .zeros ((n_samples , n_nodes ), dtype = float )
1227+ curves_tpr = np .zeros ((n_samples , n_nodes ), dtype = float )
1228+
1229+ curves_fpr [:, 0 ] = zeros
1230+ curves_tpr [:, 0 ] = zeros
1231+ curves_fpr [:, 1 ] = ones
1232+ curves_tpr [:, 1 ] = ones
1233+
1234+ pool = [(0 , 1 )]
1235+
1236+ for idx in range (n_nodes - 2 ):
1237+ left , right = pool [0 ]
1238+ pool = pool [1 :]
1239+ fprs_new , tprs_new = sample0_unconstrained (curves_fpr [:, left ], curves_tpr [:, left ], curves_fpr [:, right ], curves_tpr [:, right ], random_state )
1240+ curves_fpr [:, idx + 2 ] = fprs_new
1241+ curves_tpr [:, idx + 2 ] = tprs_new
1242+ pool = pool + [(left , idx + 2 ), (idx + 2 , right )]
1243+
1244+ sorting = np .argsort (curves_fpr , axis = 1 )
1245+ curves_fpr = curves_fpr [np .arange (n_samples )[:, None ], sorting ]
1246+ curves_tpr = curves_tpr [np .arange (n_samples )[:, None ], sorting ]
1247+
1248+ if n is not None :
1249+ curves_fpr = np .round (curves_fpr * n ) / n
1250+
1251+ if p is not None :
1252+ curves_tpr = np .round (curves_tpr * p ) / p
1253+
1254+ return curves_fpr , curves_tpr
0 commit comments