@@ -62,6 +62,40 @@ def _get_minintensity(qualifier):
6262
6363 return min_intensity , min_percent_intensity , min_tic_percent_intensity
6464
65+ def _get_intensity_mask (df , qualifier ):
66+ """
67+ Returns a boolean mask for intensity filtering, respecting >, <, = comparators.
68+ Falls back to > 0 for columns without a specific qualifier.
69+ """
70+ if qualifier is None :
71+ return (df ["i" ] > 0 ) & (df ["i_norm" ] > 0 ) & (df ["i_tic_norm" ] > 0 )
72+
73+ masks = []
74+ for qual_key , col , scale in [
75+ ("qualifierintensityvalue" , "i" , 1.0 ),
76+ ("qualifierintensitypercent" , "i_norm" , 100.0 ),
77+ ("qualifierintensityticpercent" , "i_tic_norm" , 100.0 ),
78+ ]:
79+ if qual_key in qualifier :
80+ val = float (qualifier [qual_key ]["value" ]) / scale
81+ comp = qualifier [qual_key ].get ("comparator" , "greaterthan" )
82+ if comp == "greaterthan" :
83+ if scale > 1.0 :
84+ val = min (val , 0.99 )
85+ masks .append (df [col ] > val )
86+ elif comp == "lessthan" :
87+ masks .append (df [col ] < val )
88+ else : # equal → minimum threshold (>= val), preserving historical semantics
89+ masks .append (df [col ] >= val )
90+ else :
91+ masks .append (df [col ] > 0 )
92+
93+ result = masks [0 ]
94+ for m in masks [1 :]:
95+ result = result & m
96+ return result
97+
98+
6599def _get_exclusion_flag (qualifiers ):
66100 if qualifiers is None :
67101 return False
@@ -199,30 +233,26 @@ def ms2prod_condition(condition, ms1_df, ms2_df, reference_conditions_register):
199233 if mz == "ANY" :
200234 # Checking defect options
201235 massdefect_min , massdefect_max = _get_massdefect_min (condition .get ("qualifiers" , None ))
202- ms2_filtered_df = ms2_df
236+ ms2_filtered_df = ms2_df . copy ()
203237 ms2_filtered_df ["mz_defect" ] = ms2_filtered_df ["mz" ] - ms2_filtered_df ["mz" ].astype (int )
204238
205- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity ( condition .get ("qualifiers" , None ))
239+ intensity_mask = _get_intensity_mask ( ms2_filtered_df , condition .get ("qualifiers" , None ))
206240
207241 ms2_filtered_df = ms2_filtered_df [
208- (ms2_filtered_df ["mz_defect" ] > massdefect_min ) &
242+ (ms2_filtered_df ["mz_defect" ] > massdefect_min ) &
209243 (ms2_filtered_df ["mz_defect" ] < massdefect_max ) &
210- (ms2_filtered_df ["i" ] > min_int ) &
211- (ms2_filtered_df ["i_norm" ] > min_intpercent ) &
212- (ms2_filtered_df ["i_tic_norm" ] > min_tic_percent_intensity )
244+ intensity_mask
213245 ]
214246 else :
215247 mz_tol = _get_mz_tolerance (condition .get ("qualifiers" , None ), mz )
216248 mz_min = mz - mz_tol
217249 mz_max = mz + mz_tol
218250
219- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity ( condition .get ("qualifiers" , None ))
251+ intensity_mask = _get_intensity_mask ( ms2_df , condition .get ("qualifiers" , None ))
220252
221- ms2_filtered_df = ms2_df [(ms2_df ["mz" ] > mz_min ) &
222- (ms2_df ["mz" ] < mz_max ) &
223- (ms2_df ["i" ] > min_int ) &
224- (ms2_df ["i_norm" ] > min_intpercent ) &
225- (ms2_df ["i_tic_norm" ] > min_tic_percent_intensity )]
253+ ms2_filtered_df = ms2_df [(ms2_df ["mz" ] > mz_min ) &
254+ (ms2_df ["mz" ] < mz_max ) &
255+ intensity_mask ]
226256
227257 # Setting the intensity match register
228258 _set_intensity_register (ms2_filtered_df , reference_conditions_register , condition )
@@ -283,31 +313,27 @@ def ms2nl_condition(condition, ms1_df, ms2_df, reference_conditions_register):
283313 if mz == "ANY" :
284314 # Checking defect options
285315 massdefect_min , massdefect_max = _get_massdefect_min (condition .get ("qualifiers" , None ))
286- ms2_filtered_df = ms2_df
316+ ms2_filtered_df = ms2_df . copy ()
287317 ms2_filtered_df ["mz_defect" ] = ms2_filtered_df ["mz" ] - ms2_filtered_df ["mz" ].astype (int )
288318
289- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity ( condition .get ("qualifiers" , None ))
319+ intensity_mask = _get_intensity_mask ( ms2_filtered_df , condition .get ("qualifiers" , None ))
290320
291321 ms2_filtered_df = ms2_filtered_df [
292- (ms2_filtered_df ["mz_defect" ] > massdefect_min ) &
322+ (ms2_filtered_df ["mz_defect" ] > massdefect_min ) &
293323 (ms2_filtered_df ["mz_defect" ] < massdefect_max ) &
294- (ms2_filtered_df ["i" ] > min_int ) &
295- (ms2_filtered_df ["i_norm" ] > min_intpercent ) &
296- (ms2_filtered_df ["i_tic_norm" ] > min_tic_percent_intensity )
324+ intensity_mask
297325 ]
298326 else :
299327 mz_tol = _get_mz_tolerance (condition .get ("qualifiers" , None ), mz ) #TODO: This is incorrect logic if it comes to PPM accuracy
300328 nl_min = mz - mz_tol
301329 nl_max = mz + mz_tol
302330
303- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity ( condition .get ("qualifiers" , None ))
331+ intensity_mask = _get_intensity_mask ( ms2_df , condition .get ("qualifiers" , None ))
304332
305333 ms2_filtered_df = ms2_df [
306- ((ms2_df ["precmz" ] - ms2_df ["mz" ]) > nl_min ) &
334+ ((ms2_df ["precmz" ] - ms2_df ["mz" ]) > nl_min ) &
307335 ((ms2_df ["precmz" ] - ms2_df ["mz" ]) < nl_max ) &
308- (ms2_df ["i" ] > min_int ) &
309- (ms2_df ["i_norm" ] > min_intpercent ) &
310- (ms2_df ["i_tic_norm" ] > min_tic_percent_intensity )
336+ intensity_mask
311337 ]
312338
313339 # Setting the intensity match register
@@ -440,17 +466,15 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_
440466 if mz == "ANY" :
441467 # Checking defect options
442468 massdefect_min , massdefect_max = _get_massdefect_min (condition .get ("qualifiers" , None ))
443- ms1_filtered_df = ms1_df
469+ ms1_filtered_df = ms1_df . copy ()
444470 ms1_filtered_df ["mz_defect" ] = ms1_filtered_df ["mz" ] - ms1_filtered_df ["mz" ].astype (int )
445471
446- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity ( condition .get ("qualifiers" , None ))
472+ intensity_mask = _get_intensity_mask ( ms1_filtered_df , condition .get ("qualifiers" , None ))
447473
448474 ms1_filtered_df = ms1_filtered_df [
449- (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
475+ (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
450476 (ms1_filtered_df ["mz_defect" ] < massdefect_max ) &
451- (ms1_filtered_df ["i" ] > min_int ) &
452- (ms1_filtered_df ["i_norm" ] > min_intpercent ) &
453- (ms1_filtered_df ["i_tic_norm" ] > min_tic_percent_intensity )
477+ intensity_mask
454478 ]
455479 else :
456480 # Checking defect options
@@ -460,23 +484,21 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_
460484 mz_min = mz - mz_tol
461485 mz_max = mz + mz_tol
462486
463- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity (condition .get ("qualifiers" , None ))
464-
465487 otherscan_qualifier = _get_otherscan_qualifier (condition .get ("qualifiers" , None ))
466488
467489 if otherscan_qualifier is None :
490+ intensity_mask = _get_intensity_mask (ms1_df , condition .get ("qualifiers" , None ))
491+
468492 ms1_filtered_df = ms1_df [
469- (ms1_df ["mz" ] > mz_min ) &
470- (ms1_df ["mz" ] < mz_max ) &
471- (ms1_df ["i" ] > min_int ) &
472- (ms1_df ["i_norm" ] > min_intpercent ) &
473- (ms1_df ["i_tic_norm" ] > min_tic_percent_intensity )]
493+ (ms1_df ["mz" ] > mz_min ) &
494+ (ms1_df ["mz" ] < mz_max ) &
495+ intensity_mask ]
474496
475497 if massdefect_min > 0 or massdefect_max < 1 :
476498 ms1_filtered_df ["mz_defect" ] = ms1_filtered_df ["mz" ] - ms1_filtered_df ["mz" ].astype (int )
477-
499+
478500 ms1_filtered_df = ms1_filtered_df [
479- (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
501+ (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
480502 (ms1_filtered_df ["mz_defect" ] < massdefect_max )
481503 ]
482504 else :
@@ -485,33 +507,25 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_
485507
486508 scans_to_keep = []
487509
510+ intensity_mask_original = _get_intensity_mask (ms1_original_df , condition .get ("qualifiers" , None ))
511+
488512 for scan , row in grouped_df .iterrows ():
489513 current_scan_rt = row ["rt" ]
490-
514+
491515 min_original_rt = current_scan_rt - otherscan_qualifier ["min" ]
492516 max_original_rt = current_scan_rt + otherscan_qualifier ["max" ]
493517
494- print ("RT RANGE" , min_original_rt , max_original_rt )
495-
496- print ("mz_min" , mz_min )
497- print ("mz_max" , mz_max )
498- print ("min_int" , min_int )
499- print ("min_intpercent" , min_intpercent )
500- print ("min_tic_percent_intensity" , min_tic_percent_intensity )
501-
502518 ms1_original_filtered_df = ms1_original_df [
503- (ms1_original_df ["mz" ] > mz_min ) &
504- (ms1_original_df ["mz" ] < mz_max ) &
505- (ms1_original_df ["i" ] > min_int ) &
506- (ms1_original_df ["i_norm" ] > min_intpercent ) &
507- (ms1_original_df ["i_tic_norm" ] > min_tic_percent_intensity ) &
519+ (ms1_original_df ["mz" ] > mz_min ) &
520+ (ms1_original_df ["mz" ] < mz_max ) &
521+ intensity_mask_original &
508522 (ms1_original_df ["rt" ] > min_original_rt ) &
509523 (ms1_original_df ["rt" ] < max_original_rt )]
510-
524+
511525 if len (ms1_original_filtered_df ) > 0 :
512526 # This means, the current scan we're considering is a scan that is valid to keep
513527 scans_to_keep .append (scan )
514-
528+
515529 # Lets filter the ms1_filtered to only the scans we want to keep
516530 ms1_filtered_df = ms1_df [ms1_df ["scan" ].isin (scans_to_keep )]
517531
@@ -570,39 +584,35 @@ def ms1_filter(condition, ms1_df):
570584 if mz == "ANY" :
571585 # Checking defect options
572586 massdefect_min , massdefect_max = _get_massdefect_min (condition .get ("qualifiers" , None ))
573- ms1_filtered_df = ms1_df
587+ ms1_filtered_df = ms1_df . copy ()
574588 ms1_filtered_df ["mz_defect" ] = ms1_filtered_df ["mz" ] - ms1_filtered_df ["mz" ].astype (int )
575589
576- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity ( condition .get ("qualifiers" , None ))
590+ intensity_mask = _get_intensity_mask ( ms1_filtered_df , condition .get ("qualifiers" , None ))
577591
578592 ms1_filtered_df = ms1_filtered_df [
579- (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
593+ (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
580594 (ms1_filtered_df ["mz_defect" ] < massdefect_max ) &
581- (ms1_filtered_df ["i" ] > min_int ) &
582- (ms1_filtered_df ["i_norm" ] > min_intpercent ) &
583- (ms1_filtered_df ["i_tic_norm" ] > min_tic_percent_intensity )
595+ intensity_mask
584596 ]
585597 else :
586598 # Checking defect options
587599 massdefect_min , massdefect_max = _get_massdefect_min (condition .get ("qualifiers" , None ))
588-
600+
589601 mz_tol = _get_mz_tolerance (condition .get ("qualifiers" , None ), mz )
590602 mz_min = mz - mz_tol
591603 mz_max = mz + mz_tol
592604
593- min_int , min_intpercent , min_tic_percent_intensity = _get_minintensity ( condition .get ("qualifiers" , None ))
605+ intensity_mask = _get_intensity_mask ( ms1_df , condition .get ("qualifiers" , None ))
594606 ms1_filtered_df = ms1_df [
595- (ms1_df ["mz" ] > mz_min ) &
596- (ms1_df ["mz" ] < mz_max ) &
597- (ms1_df ["i" ] > min_int ) &
598- (ms1_df ["i_norm" ] > min_intpercent ) &
599- (ms1_df ["i_tic_norm" ] > min_tic_percent_intensity )]
607+ (ms1_df ["mz" ] > mz_min ) &
608+ (ms1_df ["mz" ] < mz_max ) &
609+ intensity_mask ]
600610
601611 if massdefect_min > 0 or massdefect_max < 1 :
602612 ms1_filtered_df ["mz_defect" ] = ms1_filtered_df ["mz" ] - ms1_filtered_df ["mz" ].astype (int )
603613
604614 ms1_filtered_df = ms1_filtered_df [
605- (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
615+ (ms1_filtered_df ["mz_defect" ] > massdefect_min ) &
606616 (ms1_filtered_df ["mz_defect" ] < massdefect_max )
607617 ]
608618
0 commit comments