@@ -199,107 +199,128 @@ pub fn compute_diann_features_impl(
199199 let smoothed_best = smooth_3pt ( & best_trace) ;
200200
201201 // Step 3: Compute correlations for ALL fragments vs smoothed best.
202- let mut all_correlations: Vec < ( usize , f64 ) > = Vec :: with_capacity ( n_frags) ;
202+ // Compute BOTH strategies — they provide complementary information:
203+ // - overlap_only: accurate co-elution signal for observed data
204+ // - fill_zero: penalizes missing fragments (informative for scoring)
205+ let mut corr_overlap: Vec < ( usize , f64 ) > = Vec :: with_capacity ( n_frags) ;
206+ let mut corr_fillzero: Vec < ( usize , f64 ) > = Vec :: with_capacity ( n_frags) ;
203207 for f in 0 ..n_frags {
204- let r = match na_strategy {
205- NaStrategy :: OverlapOnly => {
206- // Only use RT points where both fragments have observed values
207- let mut a_vals = Vec :: new ( ) ;
208- let mut b_vals = Vec :: new ( ) ;
209- for r in 0 ..n_rts {
210- let best_val = matrix[ r] [ best_frag] ;
211- let frag_val = matrix[ r] [ f] ;
212- if !best_val. is_nan ( ) && !frag_val. is_nan ( ) {
213- a_vals. push ( smoothed_best[ r] ) ;
214- b_vals. push ( frag_val) ;
215- }
216- }
217- if a_vals. len ( ) >= 2 {
218- pearson_1d_impl ( & a_vals, & b_vals)
219- } else {
220- 0.0
208+ // Overlap-only: correlate at RTs where both fragments are observed
209+ let r_overlap = {
210+ let mut a_vals = Vec :: new ( ) ;
211+ let mut b_vals = Vec :: new ( ) ;
212+ for r in 0 ..n_rts {
213+ if !matrix[ r] [ best_frag] . is_nan ( ) && !matrix[ r] [ f] . is_nan ( ) {
214+ a_vals. push ( smoothed_best[ r] ) ;
215+ b_vals. push ( matrix[ r] [ f] ) ;
221216 }
222217 }
223- NaStrategy :: FillZero => {
224- // Fill NaN with 0 and use all RT points
225- let frag_trace = column_filled ( & matrix , f ) ;
226- pearson_1d_impl ( & smoothed_best , & frag_trace )
218+ if a_vals . len ( ) >= 2 {
219+ pearson_1d_impl ( & a_vals , & b_vals )
220+ } else {
221+ 0.0
227222 }
228223 } ;
229- all_correlations. push ( ( f, r) ) ;
224+ corr_overlap. push ( ( f, r_overlap) ) ;
225+
226+ // Fill-zero: fill NaN with 0 and use all RT points
227+ let frag_trace = column_filled ( & matrix, f) ;
228+ let r_fillzero = pearson_1d_impl ( & smoothed_best, & frag_trace) ;
229+ corr_fillzero. push ( ( f, r_fillzero) ) ;
230230 }
231231
232+ // Use the configured strategy as the "primary" correlations for derived features
233+ let all_correlations = match na_strategy {
234+ NaStrategy :: OverlapOnly => & corr_overlap,
235+ NaStrategy :: FillZero => & corr_fillzero,
236+ } ;
237+
232238 // Step 4: Top-N fragments by intensity
233239 let top_n_frags = find_top_n_fragments ( & matrix, n_frags, top_n) ;
234240 let top_n_ext_frags = find_top_n_fragments ( & matrix, n_frags, top_n_extended) ;
235241
236- // === Feature Group 1: Pearson correlations top-N (extended) ===
237- for i in 0 ..top_n_extended {
238- let val = if i < top_n_ext_frags. len ( ) {
239- let frag_idx = top_n_ext_frags[ i] ;
240- all_correlations
241- . iter ( )
242- . find ( |& & ( idx, _) | idx == frag_idx)
243- . map ( |& ( _, r) | if r. is_nan ( ) { 0.0 } else { r } )
244- . unwrap_or ( 0.0 )
245- } else {
246- f64:: NAN
247- } ;
248- features. insert ( format ! ( "diann_pearson_correlations_top_12_{i}" ) , val) ;
249- }
242+ // === Emit correlation features for BOTH strategies ===
243+ // The primary strategy (configured) gets the standard feature names.
244+ // The secondary strategy gets "_alt" suffixed names.
245+ let ( primary_corrs, secondary_corrs, alt_suffix) = match na_strategy {
246+ NaStrategy :: OverlapOnly => ( & corr_overlap, & corr_fillzero, "_fz" ) ,
247+ NaStrategy :: FillZero => ( & corr_fillzero, & corr_overlap, "_ov" ) ,
248+ } ;
249+
250+ // Helper: get correlation for fragment index from a correlation list
251+ let get_corr = |corrs : & [ ( usize , f64 ) ] , frag_idx : usize | -> f64 {
252+ corrs
253+ . iter ( )
254+ . find ( |& & ( i, _) | i == frag_idx)
255+ . map ( |& ( _, r) | if r. is_nan ( ) { 0.0 } else { r } )
256+ . unwrap_or ( 0.0 )
257+ } ;
258+
259+ // Emit top-N correlations for both strategies
260+ for ( corrs, suffix) in [ ( primary_corrs, "" ) , ( secondary_corrs, alt_suffix) ] {
261+ for i in 0 ..top_n_extended {
262+ let val = if i < top_n_ext_frags. len ( ) {
263+ get_corr ( corrs, top_n_ext_frags[ i] )
264+ } else {
265+ f64:: NAN
266+ } ;
267+ features. insert (
268+ format ! ( "diann_pearson_correlations_top_12_{i}{suffix}" ) ,
269+ val,
270+ ) ;
271+ }
250272
251- // === Feature Group 1: Sum of correlations (top-N) ===
252- let top_corr_sum: f64 = top_n_frags
253- . iter ( )
254- . map ( |& idx| {
255- all_correlations
256- . iter ( )
257- . find ( |& & ( i, _) | i == idx)
258- . map ( |& ( _, r) | if r. is_nan ( ) { 0.0 } else { r } )
259- . unwrap_or ( 0.0 )
260- } )
261- . sum ( ) ;
262- features. insert ( "diann_sum_correlations_mass_accuracy" . into ( ) , top_corr_sum) ;
273+ // Sum of top-N correlations
274+ let top_corr_sum: f64 = top_n_frags. iter ( ) . map ( |& idx| get_corr ( corrs, idx) ) . sum ( ) ;
275+ features. insert (
276+ format ! ( "diann_sum_correlations_mass_accuracy{suffix}" ) ,
277+ top_corr_sum,
278+ ) ;
279+ }
263280
264- // === Feature Group 1: Remaining fragment correlations ===
281+ // === Feature Group 1: Remaining + b- fragment correlations (both strategies) ===
265282 let top_set: std:: collections:: HashSet < usize > = top_n_frags. iter ( ) . copied ( ) . collect ( ) ;
266- let remaining_sum: f64 = all_correlations
267- . iter ( )
268- . filter ( |& & ( idx, _) | !top_set. contains ( & idx) )
269- . map ( |& ( _, r) | if r. is_nan ( ) { 0.0 } else { r } )
270- . sum ( ) ;
271- let remaining_count = n_frags. saturating_sub ( top_n_frags. len ( ) ) ;
272- features. insert ( "diann_remaining_fragments_correlations" . into ( ) , remaining_sum) ;
273- features. insert (
274- "diann_remaining_fragments_mean" . into ( ) ,
275- if remaining_count > 0 {
276- remaining_sum / remaining_count as f64
277- } else {
278- 0.0
279- } ,
280- ) ;
281283
282- // === Feature Group 1: Best b-fragments correlation ===
283- let mut b_corrs: Vec < f64 > = Vec :: new ( ) ;
284- for ( idx, & ( frag_idx, r) ) in all_correlations. iter ( ) . enumerate ( ) {
285- if idx < fragment_names. len ( ) && fragment_names[ frag_idx] . starts_with ( 'b' ) {
286- b_corrs. push ( if r. is_nan ( ) { 0.0 } else { r } ) ;
284+ for ( corrs, suffix) in [ ( primary_corrs, "" ) , ( secondary_corrs, alt_suffix) ] {
285+ let remaining_sum: f64 = corrs
286+ . iter ( )
287+ . filter ( |& & ( idx, _) | !top_set. contains ( & idx) )
288+ . map ( |& ( _, r) | if r. is_nan ( ) { 0.0 } else { r } )
289+ . sum ( ) ;
290+ let remaining_count = n_frags. saturating_sub ( top_n_frags. len ( ) ) ;
291+ features. insert (
292+ format ! ( "diann_remaining_fragments_correlations{suffix}" ) ,
293+ remaining_sum,
294+ ) ;
295+ features. insert (
296+ format ! ( "diann_remaining_fragments_mean{suffix}" ) ,
297+ if remaining_count > 0 {
298+ remaining_sum / remaining_count as f64
299+ } else {
300+ 0.0
301+ } ,
302+ ) ;
303+
304+ let mut b_corrs_vec: Vec < f64 > = Vec :: new ( ) ;
305+ for & ( frag_idx, r) in corrs. iter ( ) {
306+ if frag_idx < fragment_names. len ( ) && fragment_names[ frag_idx] . starts_with ( 'b' ) {
307+ b_corrs_vec. push ( if r. is_nan ( ) { 0.0 } else { r } ) ;
308+ }
287309 }
310+ b_corrs_vec. sort_by ( |a, b| b. partial_cmp ( a) . unwrap ( ) ) ;
311+ let best_b_sum: f64 = b_corrs_vec. iter ( ) . take ( 3 ) . sum ( ) ;
312+ features. insert (
313+ format ! ( "diann_best_b_fragments_correlation{suffix}" ) ,
314+ best_b_sum,
315+ ) ;
288316 }
289- b_corrs. sort_by ( |a, b| b. partial_cmp ( a) . unwrap ( ) ) ;
290- let best_b_sum: f64 = b_corrs. iter ( ) . take ( 3 ) . sum ( ) ;
291- features. insert ( "diann_best_b_fragments_correlation" . into ( ) , best_b_sum) ;
292317
293318 // === Feature Group 4: Weighted AUC ===
294319 let mut weighted_auc = 0.0 ;
295320 for & frag_idx in & top_n_frags {
296321 let frag_trace = column_filled ( & matrix, frag_idx) ;
297322 let auc = trapezoid_auc ( & sorted_rts, & frag_trace) ;
298- let corr = all_correlations
299- . iter ( )
300- . find ( |& & ( i, _) | i == frag_idx)
301- . map ( |& ( _, r) | if r. is_nan ( ) { 0.0 } else { r. abs ( ) } )
302- . unwrap_or ( 0.0 ) ;
323+ let corr = get_corr ( all_correlations, frag_idx) . abs ( ) ;
303324 weighted_auc += auc * corr;
304325 }
305326 features. insert (
0 commit comments