Skip to content

Commit 4772e7a

Browse files
committed
suppoting less and gt in intensity
1 parent 6ef71aa commit 4772e7a

4 files changed

Lines changed: 177 additions & 80 deletions

File tree

massql/msql_engine.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from massql import msql_parser
1111
from massql import msql_fileloading
1212
from massql import msql_engine_filters
13-
from massql.msql_engine_filters import _get_mz_tolerance, _get_minintensity
13+
from massql.msql_engine_filters import _get_mz_tolerance, _get_minintensity, _get_intensity_mask
1414

1515
math_parser = Parser()
1616
console = logging.StreamHandler()
@@ -224,11 +224,8 @@ def _evalute_variable_query(parsed_dict, input_filename,
224224

225225
# Filtering MS1 peaks only to consider contention for X
226226
if condition["type"] == "ms1mzcondition":
227-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
228-
variable_x_ms1_df = ms1_df[
229-
(ms1_df["i"] > min_int) &
230-
(ms1_df["i_norm"] > min_intpercent) &
231-
(ms1_df["i_tic_norm"] > min_tic_percent_intensity)]
227+
intensity_mask = _get_intensity_mask(ms1_df, condition.get("qualifiers", None))
228+
variable_x_ms1_df = ms1_df[intensity_mask]
232229

233230
# TODO: Do this for other types of variables
234231

@@ -546,13 +543,11 @@ def _executeconditions_query(parsed_dict, input_filename, ms1_input_df=None, ms2
546543
mz_min = mz - mz_tol
547544
mz_max = mz + mz_tol
548545

549-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
546+
intensity_mask = _get_intensity_mask(ms2_df, condition.get("qualifiers", None))
550547

551-
ms2_df = ms2_df[(ms2_df["mz"] > mz_min) &
552-
(ms2_df["mz"] < mz_max) &
553-
(ms2_df["i"] > min_int) &
554-
(ms2_df["i_norm"] > min_intpercent) &
555-
(ms2_df["i_tic_norm"] > min_tic_percent_intensity)]
548+
ms2_df = ms2_df[(ms2_df["mz"] > mz_min) &
549+
(ms2_df["mz"] < mz_max) &
550+
intensity_mask]
556551

557552
if "comment" in parsed_dict:
558553
ms1_df["comment"] = parsed_dict["comment"]

massql/msql_engine_filters.py

Lines changed: 78 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,40 @@ def _get_minintensity(qualifier):
6262

6363
return min_intensity, min_percent_intensity, min_tic_percent_intensity
6464

65+
def _get_intensity_mask(df, qualifier):
66+
"""
67+
Returns a boolean mask for intensity filtering, respecting >, <, = comparators.
68+
Falls back to > 0 for columns without a specific qualifier.
69+
"""
70+
if qualifier is None:
71+
return (df["i"] > 0) & (df["i_norm"] > 0) & (df["i_tic_norm"] > 0)
72+
73+
masks = []
74+
for qual_key, col, scale in [
75+
("qualifierintensityvalue", "i", 1.0),
76+
("qualifierintensitypercent", "i_norm", 100.0),
77+
("qualifierintensityticpercent", "i_tic_norm", 100.0),
78+
]:
79+
if qual_key in qualifier:
80+
val = float(qualifier[qual_key]["value"]) / scale
81+
comp = qualifier[qual_key].get("comparator", "greaterthan")
82+
if comp == "greaterthan":
83+
if scale > 1.0:
84+
val = min(val, 0.99)
85+
masks.append(df[col] > val)
86+
elif comp == "lessthan":
87+
masks.append(df[col] < val)
88+
else: # equal → minimum threshold (>= val), preserving historical semantics
89+
masks.append(df[col] >= val)
90+
else:
91+
masks.append(df[col] > 0)
92+
93+
result = masks[0]
94+
for m in masks[1:]:
95+
result = result & m
96+
return result
97+
98+
6599
def _get_exclusion_flag(qualifiers):
66100
if qualifiers is None:
67101
return False
@@ -199,30 +233,26 @@ def ms2prod_condition(condition, ms1_df, ms2_df, reference_conditions_register):
199233
if mz == "ANY":
200234
# Checking defect options
201235
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
202-
ms2_filtered_df = ms2_df
236+
ms2_filtered_df = ms2_df.copy()
203237
ms2_filtered_df["mz_defect"] = ms2_filtered_df["mz"] - ms2_filtered_df["mz"].astype(int)
204238

205-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
239+
intensity_mask = _get_intensity_mask(ms2_filtered_df, condition.get("qualifiers", None))
206240

207241
ms2_filtered_df = ms2_filtered_df[
208-
(ms2_filtered_df["mz_defect"] > massdefect_min) &
242+
(ms2_filtered_df["mz_defect"] > massdefect_min) &
209243
(ms2_filtered_df["mz_defect"] < massdefect_max) &
210-
(ms2_filtered_df["i"] > min_int) &
211-
(ms2_filtered_df["i_norm"] > min_intpercent) &
212-
(ms2_filtered_df["i_tic_norm"] > min_tic_percent_intensity)
244+
intensity_mask
213245
]
214246
else:
215247
mz_tol = _get_mz_tolerance(condition.get("qualifiers", None), mz)
216248
mz_min = mz - mz_tol
217249
mz_max = mz + mz_tol
218250

219-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
251+
intensity_mask = _get_intensity_mask(ms2_df, condition.get("qualifiers", None))
220252

221-
ms2_filtered_df = ms2_df[(ms2_df["mz"] > mz_min) &
222-
(ms2_df["mz"] < mz_max) &
223-
(ms2_df["i"] > min_int) &
224-
(ms2_df["i_norm"] > min_intpercent) &
225-
(ms2_df["i_tic_norm"] > min_tic_percent_intensity)]
253+
ms2_filtered_df = ms2_df[(ms2_df["mz"] > mz_min) &
254+
(ms2_df["mz"] < mz_max) &
255+
intensity_mask]
226256

227257
# Setting the intensity match register
228258
_set_intensity_register(ms2_filtered_df, reference_conditions_register, condition)
@@ -283,31 +313,27 @@ def ms2nl_condition(condition, ms1_df, ms2_df, reference_conditions_register):
283313
if mz == "ANY":
284314
# Checking defect options
285315
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
286-
ms2_filtered_df = ms2_df
316+
ms2_filtered_df = ms2_df.copy()
287317
ms2_filtered_df["mz_defect"] = ms2_filtered_df["mz"] - ms2_filtered_df["mz"].astype(int)
288318

289-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
319+
intensity_mask = _get_intensity_mask(ms2_filtered_df, condition.get("qualifiers", None))
290320

291321
ms2_filtered_df = ms2_filtered_df[
292-
(ms2_filtered_df["mz_defect"] > massdefect_min) &
322+
(ms2_filtered_df["mz_defect"] > massdefect_min) &
293323
(ms2_filtered_df["mz_defect"] < massdefect_max) &
294-
(ms2_filtered_df["i"] > min_int) &
295-
(ms2_filtered_df["i_norm"] > min_intpercent) &
296-
(ms2_filtered_df["i_tic_norm"] > min_tic_percent_intensity)
324+
intensity_mask
297325
]
298326
else:
299327
mz_tol = _get_mz_tolerance(condition.get("qualifiers", None), mz) #TODO: This is incorrect logic if it comes to PPM accuracy
300328
nl_min = mz - mz_tol
301329
nl_max = mz + mz_tol
302330

303-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
331+
intensity_mask = _get_intensity_mask(ms2_df, condition.get("qualifiers", None))
304332

305333
ms2_filtered_df = ms2_df[
306-
((ms2_df["precmz"] - ms2_df["mz"]) > nl_min) &
334+
((ms2_df["precmz"] - ms2_df["mz"]) > nl_min) &
307335
((ms2_df["precmz"] - ms2_df["mz"]) < nl_max) &
308-
(ms2_df["i"] > min_int) &
309-
(ms2_df["i_norm"] > min_intpercent) &
310-
(ms2_df["i_tic_norm"] > min_tic_percent_intensity)
336+
intensity_mask
311337
]
312338

313339
# Setting the intensity match register
@@ -440,17 +466,15 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_
440466
if mz == "ANY":
441467
# Checking defect options
442468
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
443-
ms1_filtered_df = ms1_df
469+
ms1_filtered_df = ms1_df.copy()
444470
ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int)
445471

446-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
472+
intensity_mask = _get_intensity_mask(ms1_filtered_df, condition.get("qualifiers", None))
447473

448474
ms1_filtered_df = ms1_filtered_df[
449-
(ms1_filtered_df["mz_defect"] > massdefect_min) &
475+
(ms1_filtered_df["mz_defect"] > massdefect_min) &
450476
(ms1_filtered_df["mz_defect"] < massdefect_max) &
451-
(ms1_filtered_df["i"] > min_int) &
452-
(ms1_filtered_df["i_norm"] > min_intpercent) &
453-
(ms1_filtered_df["i_tic_norm"] > min_tic_percent_intensity)
477+
intensity_mask
454478
]
455479
else:
456480
# Checking defect options
@@ -460,23 +484,21 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_
460484
mz_min = mz - mz_tol
461485
mz_max = mz + mz_tol
462486

463-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
464-
465487
otherscan_qualifier = _get_otherscan_qualifier(condition.get("qualifiers", None))
466488

467489
if otherscan_qualifier is None:
490+
intensity_mask = _get_intensity_mask(ms1_df, condition.get("qualifiers", None))
491+
468492
ms1_filtered_df = ms1_df[
469-
(ms1_df["mz"] > mz_min) &
470-
(ms1_df["mz"] < mz_max) &
471-
(ms1_df["i"] > min_int) &
472-
(ms1_df["i_norm"] > min_intpercent) &
473-
(ms1_df["i_tic_norm"] > min_tic_percent_intensity)]
493+
(ms1_df["mz"] > mz_min) &
494+
(ms1_df["mz"] < mz_max) &
495+
intensity_mask]
474496

475497
if massdefect_min > 0 or massdefect_max < 1:
476498
ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int)
477-
499+
478500
ms1_filtered_df = ms1_filtered_df[
479-
(ms1_filtered_df["mz_defect"] > massdefect_min) &
501+
(ms1_filtered_df["mz_defect"] > massdefect_min) &
480502
(ms1_filtered_df["mz_defect"] < massdefect_max)
481503
]
482504
else:
@@ -485,33 +507,25 @@ def ms1_condition(condition, ms1_df, ms2_df, reference_conditions_register, ms1_
485507

486508
scans_to_keep = []
487509

510+
intensity_mask_original = _get_intensity_mask(ms1_original_df, condition.get("qualifiers", None))
511+
488512
for scan, row in grouped_df.iterrows():
489513
current_scan_rt = row["rt"]
490-
514+
491515
min_original_rt = current_scan_rt - otherscan_qualifier["min"]
492516
max_original_rt = current_scan_rt + otherscan_qualifier["max"]
493517

494-
print("RT RANGE", min_original_rt, max_original_rt)
495-
496-
print("mz_min", mz_min)
497-
print("mz_max", mz_max)
498-
print("min_int", min_int)
499-
print("min_intpercent", min_intpercent)
500-
print("min_tic_percent_intensity", min_tic_percent_intensity)
501-
502518
ms1_original_filtered_df = ms1_original_df[
503-
(ms1_original_df["mz"] > mz_min) &
504-
(ms1_original_df["mz"] < mz_max) &
505-
(ms1_original_df["i"] > min_int) &
506-
(ms1_original_df["i_norm"] > min_intpercent) &
507-
(ms1_original_df["i_tic_norm"] > min_tic_percent_intensity) &
519+
(ms1_original_df["mz"] > mz_min) &
520+
(ms1_original_df["mz"] < mz_max) &
521+
intensity_mask_original &
508522
(ms1_original_df["rt"] > min_original_rt) &
509523
(ms1_original_df["rt"] < max_original_rt)]
510-
524+
511525
if len(ms1_original_filtered_df) > 0:
512526
# This means, the current scan we're considering is a scan that is valid to keep
513527
scans_to_keep.append(scan)
514-
528+
515529
# Lets filter the ms1_filtered to only the scans we want to keep
516530
ms1_filtered_df = ms1_df[ms1_df["scan"].isin(scans_to_keep)]
517531

@@ -570,39 +584,35 @@ def ms1_filter(condition, ms1_df):
570584
if mz == "ANY":
571585
# Checking defect options
572586
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
573-
ms1_filtered_df = ms1_df
587+
ms1_filtered_df = ms1_df.copy()
574588
ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int)
575589

576-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
590+
intensity_mask = _get_intensity_mask(ms1_filtered_df, condition.get("qualifiers", None))
577591

578592
ms1_filtered_df = ms1_filtered_df[
579-
(ms1_filtered_df["mz_defect"] > massdefect_min) &
593+
(ms1_filtered_df["mz_defect"] > massdefect_min) &
580594
(ms1_filtered_df["mz_defect"] < massdefect_max) &
581-
(ms1_filtered_df["i"] > min_int) &
582-
(ms1_filtered_df["i_norm"] > min_intpercent) &
583-
(ms1_filtered_df["i_tic_norm"] > min_tic_percent_intensity)
595+
intensity_mask
584596
]
585597
else:
586598
# Checking defect options
587599
massdefect_min, massdefect_max = _get_massdefect_min(condition.get("qualifiers", None))
588-
600+
589601
mz_tol = _get_mz_tolerance(condition.get("qualifiers", None), mz)
590602
mz_min = mz - mz_tol
591603
mz_max = mz + mz_tol
592604

593-
min_int, min_intpercent, min_tic_percent_intensity = _get_minintensity(condition.get("qualifiers", None))
605+
intensity_mask = _get_intensity_mask(ms1_df, condition.get("qualifiers", None))
594606
ms1_filtered_df = ms1_df[
595-
(ms1_df["mz"] > mz_min) &
596-
(ms1_df["mz"] < mz_max) &
597-
(ms1_df["i"] > min_int) &
598-
(ms1_df["i_norm"] > min_intpercent) &
599-
(ms1_df["i_tic_norm"] > min_tic_percent_intensity)]
607+
(ms1_df["mz"] > mz_min) &
608+
(ms1_df["mz"] < mz_max) &
609+
intensity_mask]
600610

601611
if massdefect_min > 0 or massdefect_max < 1:
602612
ms1_filtered_df["mz_defect"] = ms1_filtered_df["mz"] - ms1_filtered_df["mz"].astype(int)
603613

604614
ms1_filtered_df = ms1_filtered_df[
605-
(ms1_filtered_df["mz_defect"] > massdefect_min) &
615+
(ms1_filtered_df["mz_defect"] > massdefect_min) &
606616
(ms1_filtered_df["mz_defect"] < massdefect_max)
607617
]
608618

massql/msql_parser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,13 @@ def qualifier(self, items):
146146
qualifier_dict[qualifier_type] = {}
147147
qualifier_dict[qualifier_type]["name"] = qualifier_type
148148

149+
# Capture the comparator operator (equal, greaterthan, lessthan)
150+
operator_tree = items[1]
151+
if hasattr(operator_tree, 'data'):
152+
qualifier_dict[qualifier_type]["comparator"] = operator_tree.data
153+
else:
154+
qualifier_dict[qualifier_type]["comparator"] = "equal"
155+
149156
if qualifier_type == "qualifierppmtolerance":
150157
qualifier_dict[qualifier_type]["unit"] = "ppm"
151158
if qualifier_type == "qualifiermztolerance":

0 commit comments

Comments
 (0)