diff --git a/MetricsReloaded/metrics/pairwise_measures.py b/MetricsReloaded/metrics/pairwise_measures.py index 94c5620..d08a891 100755 --- a/MetricsReloaded/metrics/pairwise_measures.py +++ b/MetricsReloaded/metrics/pairwise_measures.py @@ -286,15 +286,29 @@ def __init__( self.flag_empty = empty self.flag_empty_pred = False self.flag_empty_ref = False - if np.sum(self.pred) == 0: + if int(np.sum(self.pred)) == 0: self.flag_empty_pred = True - if np.sum(self.ref) == 0: + if int(np.sum(self.ref)) == 0: self.flag_empty_ref = True self.measures = measures if measures is not None else self.measures_dict self.connectivity = connectivity_type self.pixdim = pixdim + self.worse_dist = self.calculate_worse_dist() self.dict_args = dict_args + def calculate_worse_dist(self): + shape = self.ref.shape + pixdim = self.pixdim + if pixdim is not None: + mult_sp = shape * np.asarray(pixdim) + else: + mult_sp = shape + print(mult_sp) + max_dist = np.sqrt(np.sum(np.square(mult_sp))) + print(max_dist) + return max_dist + + def __fp_map(self): """ This function calculates the false positive map @@ -479,6 +493,12 @@ def youden_index(self): :return: youden_index """ + if self.n_pos_ref() == 0: + warnings.warn("Reference is empty - sensitivity is not defined") + return np.nan + if self.n_neg_ref() == 0: + warnings.warn("Reference is fully positive, specificity is not defined") + return np.nan youden_index = self.specificity() + self.sensitivity() - 1 return youden_index @@ -533,6 +553,12 @@ def balanced_accuracy(self): :return: balanced accuracy """ + if self.n_neg_ref() == 0: + warnings.warn('Reference All positive - speciicity not defined') + return np.nan + if self.n_pos_ref() == 0: + warnings.warn("Reference All negative - sensitivity not defined") + return np.nan balanced_accuracy = 0.5 * self.sensitivity() + 0.5 * self.specificity() return balanced_accuracy @@ -564,6 +590,9 @@ def false_positive_rate(self): :return: false_positive_rate """ + if self.n_neg_ref() == 0: + warnings.warn("All positive in reference - FPR not defined") + return np.nan false_positive_rate = self.fp() / self.n_neg_ref() return false_positive_rate @@ -578,6 +607,12 @@ def normalised_expected_cost(self): prior_background = (self.tn() + self.fp()) / (np.size(self.ref)) prior_foreground = (self.tp() + self.fn()) / np.size(self.ref) + if self.n_pos_ref() == 0: + warnings.warn("Reference empty - r_fn not defined") + return np.nan + if self.n_neg_ref() == 0: + warnings.warn("Reference all positive - r_fp not defined") + return np.nan if "cost_fn" in self.dict_args.keys(): c_fn = self.dict_args["cost_fn"] @@ -681,7 +716,7 @@ def positive_likelihood_ratio(self): warnings.warn("reference empty - sensitivity not defined") return np.nan if self.specificity() == 1: - warnings.warn("Perfect specifiicty - likelihood ratio not defined") + warnings.warn("Perfect specificity - likelihood ratio not defined") return np.nan positive_likelihood_ratio = numerator / denominator return positive_likelihood_ratio @@ -718,8 +753,8 @@ def positive_predictive_value(self): warnings.warn("ref and prediction empty ppv not defined") return np.nan else: - warnings.warn("prediction empty, ppv not defined but set to 0") - return 0 + warnings.warn("prediction empty, ppv not defined") + return np.nan positive_predictive_value = self.tp() / (self.tp() + self.fp()) return positive_predictive_value @@ -734,7 +769,7 @@ def recall(self): return np.nan if self.n_pos_pred() == 0: warnings.warn( - "prediction is empty but ref not, recall not defined but set to 0" + "prediction is empty but ref not, recall set to 0" ) return 0 recall = self.tp() / (self.tp() + self.fn()) @@ -760,8 +795,8 @@ def dsc(self): numerator = 2 * self.tp() denominator = self.n_pos_pred() + self.n_pos_ref() if denominator == 0: - warnings.warn("Both Prediction and Reference are empty - set to 1 as correct solution even if not defined") - return 1 + warnings.warn("Both Prediction and Reference are empty - not defined - can be set to 1 when aggregating") + return np.nan else: dsc = numerator / denominator return dsc @@ -796,15 +831,15 @@ def fbeta(self): np.square(beta) * self.positive_predictive_value() + self.recall() ) if np.isnan(denominator): - if self.fp() + self.fn() > 0: + if self.fp() + self.fn() > 0: # Would occur if reference empty and prediction not return 0 else: - return 1 # Potentially modify to nan + return np.nan # Potentially modify to nan elif denominator == 0: if self.fp() + self.fn() > 0: return 0 else: - return 1 # Potentially modify to nan + return np.nan # Potentially modify to nan else: fbeta = numerator / denominator return fbeta @@ -857,9 +892,9 @@ def negative_predictive_value(self): return np.nan # Potentially modify to 1 else: warnings.warn( - "Nothing negative in pred but should be NPV not defined but set to 0" + "Nothing negative in pred but should be NPV not defined set to nan - possibly set to 0 in aggregation" ) - return 0 + return np.nan negative_predictive_value = self.tn() / (self.fn() + self.tn()) return negative_predictive_value @@ -1071,18 +1106,19 @@ def centreline_dsc(self): :return: cDSC """ - if self.n_pos_pred == 0 and self.n_pos_ref == 0: - warnings.warn("Both reference and prediction are empty - setting to max") - return 1 - top_prec = self.topology_precision() - top_sens = self.topology_sensitivity() - numerator = 2 * top_sens * top_prec - denominator = top_sens + top_prec - if np.isnan(top_sens) or np.isnan(top_sens): - warnings.warn("Topology sensitivity or precision not defined") + if int(self.n_pos_pred()) == 0 and int(self.n_pos_ref()) == 0: + warnings.warn("Both reference and prediction are empty - setting to nan - should be changed to max in aggregation") return np.nan - cDSC = numerator / denominator - return cDSC + else: + top_prec = self.topology_precision() + top_sens = self.topology_sensitivity() + numerator = 2 * top_sens * top_prec + denominator = top_sens + top_prec + if np.isnan(top_sens) or np.isnan(top_sens): + warnings.warn("Topology sensitivity or precision not defined") + return np.nan + cDSC = numerator / denominator + return cDSC def boundary_iou(self): """ @@ -1105,8 +1141,8 @@ def boundary_iou(self): else: distance = 1 if int(self.n_pos_ref()) == 0 and int(self.n_pos_pred()) == 0: - warnings.warn("Both prediction and reference empty - setting to max for boudnary ioU") - return 1 + warnings.warn("Both prediction and reference empty - return nan but setting to max for boudnary ioU in aggregation") + return np.nan else: border_ref = MorphologyOps(self.ref, self.connectivity).border_map() distance_border_ref = ndimage.distance_transform_edt(1 - border_ref) @@ -1185,8 +1221,8 @@ def normalised_surface_distance(self): warnings.warn('No value set up for NSD tolerance - default to 1') tau = 1 if int(self.n_pos_pred()) == 0 and int(self.n_pos_ref()) == 0 : - warnings.warn("Both reference and prediction are empty - setting to best") - return 1 + warnings.warn("Both reference and prediction are empty - setting to best in aggregation but returning nan here") + return np.nan else: dist_ref, dist_pred, border_ref, border_pred = self.border_distance() reg_ref = np.where( @@ -1201,7 +1237,8 @@ def normalised_surface_distance(self): denominator = np.sum(border_ref) + np.sum(border_pred) # print(numerator, denominator, tau) return numerator / denominator - + + @CacheFunctionOutput def measured_distance(self): """ This functions calculates the average symmetric distance and the @@ -1218,8 +1255,8 @@ def measured_distance(self): warnings.warn('Percentile not specified in options for Hausdorff distance - default set to 95') perc = 95 if np.sum(self.pred + self.ref) == 0: - warnings.warn("Prediction and reference empty - distances set to 0") - return 0, 0, 0, 0 + warnings.warn("Prediction and reference empty -not defined - need to set to 0 in aggregation ") + return np.nan, np.nan, np.nan, np.nan if np.sum(self.pred) == 0 and np.sum(self.ref)>0: warnings.warn("Prediction empty but reference not empty - need to set to worse case in aggregation") return np.nan, np.nan, np.nan, np.nan diff --git a/MetricsReloaded/metrics/prob_pairwise_measures.py b/MetricsReloaded/metrics/prob_pairwise_measures.py index 48caec4..aa5a88f 100644 --- a/MetricsReloaded/metrics/prob_pairwise_measures.py +++ b/MetricsReloaded/metrics/prob_pairwise_measures.py @@ -74,6 +74,8 @@ def __init__( self.ref = ref_proba self.case = case self.flag_empty = empty + self.flag_ref_empty = True if int(self.n_pos_ref()) == 0 else False + self.flag_pred_empty = True if int(self.n_pos_pred()) == 0 else False self.dict_args = dict_args self.measures = measures if measures is not None else self.measures_dict @@ -96,6 +98,10 @@ def tn_thr(self, thresh): @CacheFunctionOutput def n_pos_ref(self): return np.sum(self.ref) + + @CacheFunctionOutput + def n_pos_pred(self): + return np.sum(self.pred) @CacheFunctionOutput def n_neg_ref(self): diff --git a/MetricsReloaded/processes/mixed_measures_processes.py b/MetricsReloaded/processes/mixed_measures_processes.py index 228124d..755295b 100644 --- a/MetricsReloaded/processes/mixed_measures_processes.py +++ b/MetricsReloaded/processes/mixed_measures_processes.py @@ -73,6 +73,7 @@ "MultiLabelPairwiseMeasures", ] +list_distance = ['masd','assd','hd','hd_perc'] class MixedLocSegPairwiseMeasure(object): """ @@ -704,8 +705,9 @@ def __init__( self.connectivity_type = connectivity_type ndim = 0 self.pixdim = pixdim + self.squeeze_ref_and_pred_to_size() if len(self.pred)>0: - ndim = np.asarray(self.pred[0]).ndim + ndim = np.asarray(self.ref[0]).ndim if len(self.pixdim) == 0 and ndim>0: self.pixdim = np.ones([ndim]) elif ndim>0: @@ -721,6 +723,16 @@ def __init__( if pred_proba is None or pred_proba[0] is None: self.flag_valid_proba = False + def squeeze_ref_and_pred_to_size(self): + for i,(p,r) in enumerate(zip(self.pred, self.ref)): + if np.size(np.asarray(p)) == np.size(np.asarray(r)) and np.asarray(p).ndim != np.asarray(r).ndim: + warnings.warn("There is a dimensional mismatch between pred and ref despite same size") + p = np.squeeze(np.asarray(p)) + r = np.squeeze(np.asarray(r)) + self.pred[i] = p + self.ref[i] = r + return + def per_label_dict(self): list_bin = [] list_mt = [] @@ -764,6 +776,16 @@ def per_label_dict(self): dict_bin = BPM.to_dict_meas() dict_bin["label"] = lab dict_bin["case"] = name + dict_bin["worse_dist"] = BPM.worse_dist + if any(x in self.measures_binary for x in list_distance): + dict_bin["worse_dist"] = BPM.worse_dist + dict_bin["check_empty"] = "None" + if BPM.flag_empty_pred and BPM.flag_empty_ref: + dict_bin["check_empty"] = "Both" + elif BPM.flag_empty_ref: + dict_bin["check_empty"] = "Ref" + elif BPM.flag_empty_pred: + dict_bin["check_empty"] = "Pred" list_bin.append(dict_bin) if self.flag_valid_proba and len(self.measures_mt)>0: PPM = ProbabilityPairwiseMeasures( @@ -772,6 +794,7 @@ def per_label_dict(self): measures=self.measures_mt, dict_args=self.dict_args, ) + dict_mt = PPM.to_dict_meas() dict_mt["label"] = lab dict_mt["case"] = name diff --git a/MetricsReloaded/processes/overall_process.py b/MetricsReloaded/processes/overall_process.py index 377af25..0e4a472 100644 --- a/MetricsReloaded/processes/overall_process.py +++ b/MetricsReloaded/processes/overall_process.py @@ -227,6 +227,8 @@ MAX = 1000 +LIST_DISTANCE = ['hd','masd','assd','hd_perc'] + WORSE = { "ap": 0, "auroc": 0, @@ -257,6 +259,38 @@ "nsd": 0, } +BEST = { + "ap": 1, + "auroc": 1, + "froc": 1, + "sens@spec": 1, + "sens@ppv": 1, + "spec@sens": 1, + "fppi@sens": 0, + "ppv@sens": 1, + "sens@fppi": 1, + "fbeta": 1, + "ec":0, + "accuracy": 1, + "ba": 1, + "lr+": 1, + "youden_ind": 1, + "mcc": 1, + "wck": 1, + "cohens_kappa": 1, + "iou": 1, + "dsc": 1, + "cldice": 1, + "masd": 0, + "assd": 0, + "hd_perc": 0, + "hd": 0, + "boundary_iou": 1, + "nsd": 1, +} + +NAN_LIST = ["iou","dsc","fbeta","masd",'cldice','hd','hd_perc','assd','boundary_iou','nsd'] + class ProcessEvaluation(object): """ Performs the evaluation of the data stored in a pickled file according to all the measures, categories and choices of processing @@ -484,6 +518,39 @@ def process_data(self): self.resmt = df_resmt self.resmcc = df_resmcc self.rescal = df_rescal + self.create_mapping_column_nan_replaced_seg() + return + + def create_mapping_column_nan_replaced_seg(self): + """ + For each measure (segmentation) for which nan are possible + creates an additional column in which nans are replaced by value (worse or best according to situation + """ + list_to_map = [] + for x in self.measures_boundary: + if x in NAN_LIST: + list_to_map.append(x) + for x in self.measures_overlap: + if x in NAN_LIST: + list_to_map.append(x) + for k in list_to_map: + self.resseg[k+'_nanrep'] = self.resseg[k] + + self.resseg[k+'_nanrep'] = np.where(np.logical_and(self.resseg[k].isna(),self.resseg['check_empty']=='Both') + ,BEST[k],self.resseg[k+'_nanrep']) + self.resseg[k+'_nanrep'] = np.where(np.logical_and(self.resseg[k+'_nanrep'].isna(),self.resseg['check_empty']=='Ref') + ,WORSE[k],self.resseg[k+'_nanrep']) + self.resseg[k+'_nanrep'] = np.where(np.logical_and(self.resseg[k+'_nanrep'].isna(),self.resseg['check_empty']=='Pred') + ,WORSE[k],self.resseg[k+'_nanrep']) + self.resseg[k+'_nanrep'] = np.where(np.logical_and(self.resseg[k].isna(),k in LIST_DISTANCE) + ,self.resseg['worse_dist'],self.resseg[k+'_nanrep']) + + return + + + + + def identify_empty_ref(self): return def complete_missing_cases(self): diff --git a/test/test_metrics/test_pairwise_measures.py b/test/test_metrics/test_pairwise_measures.py index 4ec79ab..abc2cbb 100644 --- a/test/test_metrics/test_pairwise_measures.py +++ b/test/test_metrics/test_pairwise_measures.py @@ -631,16 +631,16 @@ def test_mcc(): -def test_distance_empty(): - """ - Testing that output is 0 when reference and prediction empty for calculation of distance - """ - pred = np.zeros([14, 14]) - ref = np.zeros([14, 14]) - bpm = PM(pred, ref) - value_test = bpm.measured_distance() - expected_dist = (0, 0, 0, 0) - assert_allclose(value_test, expected_dist) +# def test_distance_empty(): +# """ +# Testing that output is 0 when reference and prediction empty for calculation of distance +# """ +# pred = np.zeros([14, 14]) +# ref = np.zeros([14, 14]) +# bpm = PM(pred, ref) +# value_test = bpm.measured_distance() +# expected_dist = (0, 0, 0, 0) +# assert_allclose(value_test, expected_dist) def test_fbeta(): """ @@ -854,10 +854,17 @@ def test_distance_empty_pred(): def test_distance_empty_pred_and_ref(): ppm1 = PM(pred29_1*0, ref29_1*0) hd, hd_perc, masd, assd = ppm1.measured_distance() - assert hd == 0 - assert hd_perc == 0 - assert masd == 0 - assert assd == 0 + assert np.isnan(hd) + assert np.isnan(hd_perc) + assert np.isnan(masd) + assert np.isnan(assd) + +def test_calculate_worse_dist(): + ppm_pix1 = PM(pred210_1, ref210) + ppm_pix12 = PM(pred210_1, ref210,pixdim=[1,2]) + assert_allclose(ppm_pix1.worse_dist,19.80,atol=0.01) + assert_allclose(ppm_pix12.worse_dist,31.30,atol=0.01) + def test_boundary_iou(): """ @@ -876,9 +883,11 @@ def test_empty_ref_pred_nsd_biou(): pred_empty = np.zeros([14,14]) ppm_empty = PM(pred_empty, ref_empty) nsd = ppm_empty.normalised_surface_distance() - assert nsd == 1 + assert np.isnan(nsd) biou = ppm_empty.boundary_iou() - assert biou == 1 + assert np.isnan(biou) + cldsc = ppm_empty.centreline_dsc() + assert np.isnan(cldsc) def test_cldsc_s214(): @@ -941,7 +950,7 @@ def test_empty_reference(): spec = pm2.specificity() expected_fbeta = 1 - assert fbeta == expected_fbeta + assert np.isnan(fbeta) assert sens != sens # True if nan assert spec != spec # True if nan diff --git a/test/test_processes/test_overall_process.py b/test/test_processes/test_overall_process.py index 4eec024..8deac70 100644 --- a/test/test_processes/test_overall_process.py +++ b/test/test_processes/test_overall_process.py @@ -23,12 +23,21 @@ ref12 = ref1 + 2*ref2 pred12 = pred1 + 2*pred2 +pred_empty = np.zeros([21,21]) +ref_empty = np.zeros([21,21]) + data_init = {} data_init['pred_class'] = [pred1, pred2] data_init['ref_class'] = [ref1, ref2] data_init['list_values'] = [1] data_init['pred_prob'] = [None,None] +data_nan = {} +data_nan['pred_class'] = [pred1, pred1, pred_empty, pred1] +data_nan['ref_class'] = [ref12, ref1, ref1, ref_empty] +data_nan['list_values'] = [1,2] +data_nan['pred_prob'] = [None,None,None,None] + data_miss = {} data_miss['pred_class'] = [pred1, pred2] data_miss['ref_class'] = [ref1, ref2] @@ -48,17 +57,23 @@ data_agg2['list_values'] = [1,2] data_agg2['pred_prob'] = [None,None] +def test_op_nanreplacement(): + pe = PE(data_nan,'SemS',measures_overlap=['fbeta'],measures_boundary=['masd']) + print(pe.resseg, pe.resseg.columns,pe.resseg[['fbeta','fbeta_nanrep','masd','masd_nanrep','check_empty','label','case']]) + assert_allclose(pe.resseg.shape,[8,10]) + df_test = pe.resseg + assert_allclose(df_test[(df_test['label']==1) & (df_test['case']==2)]['masd_nanrep'],29.69,atol=0.01) def test_op_aggregation(): pe = PE(data_init,'SemS',measures_overlap=['fbeta'],measures_boundary=['boundary_iou']) print(pe.grouped_lab) - assert_allclose(pe.grouped_lab.shape,[2,4]) + assert_allclose(pe.grouped_lab.shape,[2,8]) def test_op_refmissing(): pe = PE(data_miss,'SemS',measures_overlap=['fbeta'],measures_boundary=['boundary_iou']) print(pe.grouped_lab, pe.resseg) - assert_allclose(pe.grouped_lab.shape,[3,4]) + assert_allclose(pe.grouped_lab.shape,[3,8])