Skip to content

Commit b96650e

Browse files
committed
feat: add Pearson correlation as regression fitness function (fit: pearson)
1 parent 96505da commit b96650e

4 files changed

Lines changed: 57 additions & 6 deletions

File tree

src/data.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,7 @@ impl Data {
11531153
let is_regression = matches!(
11541154
param.general.fit,
11551155
crate::param::FitFunction::spearman
1156+
| crate::param::FitFunction::pearson
11561157
| crate::param::FitFunction::rmse
11571158
| crate::param::FitFunction::mutual_information
11581159
);

src/param.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ pub enum FitFunction {
2828
g_mean,
2929
/// Spearman rank correlation (regression mode)
3030
spearman,
31+
/// Pearson linear correlation coefficient
32+
pearson,
3133
/// Root Mean Squared Error (regression mode, negated so higher = better)
3234
rmse,
3335
/// Mutual Information (regression/classification)
@@ -79,6 +81,7 @@ pub enum FbmCIMethod {
7981
agresti_coull,
8082
/// Clopper-Pearson exact interval via Beta distribution quantiles.
8183
/// Conservative (guaranteed ≥ nominal coverage), widest intervals. Clopper & Pearson (1934).
84+
/// Pearson linear correlation coefficient
8285
clopper_pearson,
8386
}
8487

src/population.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,10 @@ impl Population {
340340
if param.general.bias_penalty != 0.0
341341
&& !matches!(
342342
param.general.fit,
343-
FitFunction::spearman | FitFunction::rmse | FitFunction::mutual_information
343+
FitFunction::spearman
344+
| FitFunction::pearson
345+
| FitFunction::rmse
346+
| FitFunction::mutual_information
344347
)
345348
{
346349
if i.cls.sensitivity < 0.5 {
@@ -447,9 +450,10 @@ impl Population {
447450
let penalties = match param.general.fit {
448451
FitFunction::sensitivity => Some([param.general.fr_penalty, 1.0]),
449452
FitFunction::specificity => Some([1.0, param.general.fr_penalty]),
450-
FitFunction::spearman | FitFunction::rmse | FitFunction::mutual_information => {
451-
None
452-
}
453+
FitFunction::spearman
454+
| FitFunction::pearson
455+
| FitFunction::rmse
456+
| FitFunction::mutual_information => None,
453457
_ => None,
454458
};
455459
match param.general.fit {
@@ -513,6 +517,9 @@ impl Population {
513517
FitFunction::spearman => {
514518
i.fit = crate::utils::spearman_correlation(&scores, &data.y);
515519
}
520+
FitFunction::pearson => {
521+
i.fit = crate::utils::pearson_correlation(&scores, &data.y);
522+
}
516523
FitFunction::rmse => {
517524
i.fit = crate::utils::neg_rmse(&scores, &data.y);
518525
}

src/utils.rs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,10 @@ pub fn compute_roc_and_metrics_from_value(
706706
FitFunction::ppv => ppv(tp_init, fp_init),
707707
FitFunction::g_mean => g_mean(sens_init, spec_init),
708708
// Regression metrics — threshold optimization not applicable
709-
FitFunction::spearman | FitFunction::rmse | FitFunction::mutual_information => 0.0,
709+
FitFunction::spearman
710+
| FitFunction::pearson
711+
| FitFunction::rmse
712+
| FitFunction::mutual_information => 0.0,
710713
};
711714

712715
if obj_init > best_objective {
@@ -765,7 +768,10 @@ pub fn compute_roc_and_metrics_from_value(
765768
FitFunction::npv => npv(tn, fn_count),
766769
FitFunction::ppv => ppv(tp, fp),
767770
FitFunction::g_mean => g_mean(sensitivity, specificity),
768-
FitFunction::spearman | FitFunction::rmse | FitFunction::mutual_information => 0.0,
771+
FitFunction::spearman
772+
| FitFunction::pearson
773+
| FitFunction::rmse
774+
| FitFunction::mutual_information => 0.0,
769775
};
770776

771777
if objective > best_objective {
@@ -956,6 +962,40 @@ pub fn spearman_correlation(x: &[f64], y: &[f64]) -> f64 {
956962
cov / (var_x.sqrt() * var_y.sqrt())
957963
}
958964

965+
/// Compute Pearson correlation coefficient between two vectors.
966+
///
967+
/// Returns r ∈ [-1, 1]. Higher = stronger linear association.
968+
/// Unlike Spearman (rank-based), Pearson measures linear correlation on raw values.
969+
pub fn pearson_correlation(x: &[f64], y: &[f64]) -> f64 {
970+
assert_eq!(x.len(), y.len(), "Pearson: vectors must have equal length");
971+
let n = x.len();
972+
if n < 2 {
973+
return 0.0;
974+
}
975+
976+
let n_f = n as f64;
977+
let mean_x: f64 = x.iter().sum::<f64>() / n_f;
978+
let mean_y: f64 = y.iter().sum::<f64>() / n_f;
979+
980+
let mut cov = 0.0;
981+
let mut var_x = 0.0;
982+
let mut var_y = 0.0;
983+
984+
for i in 0..n {
985+
let dx = x[i] - mean_x;
986+
let dy = y[i] - mean_y;
987+
cov += dx * dy;
988+
var_x += dx * dx;
989+
var_y += dy * dy;
990+
}
991+
992+
if var_x == 0.0 || var_y == 0.0 {
993+
return 0.0;
994+
}
995+
996+
cov / (var_x.sqrt() * var_y.sqrt())
997+
}
998+
959999
/// Compute negative Root Mean Squared Error between predictions and targets.
9601000
///
9611001
/// Returns -RMSE so that higher = better (consistent with other fit functions).

0 commit comments

Comments
 (0)