111111 '02X' : '02X' , '03X' : '03X' , '04X' : '04X' , '12X' : '12X' , '13X' : '13X' , '14X' : '14X' , '15X' : '15X' , '24X' : '24X' , '35X' : '35X' }
112112A_cross_rings = {c for c in cut_type_dict if c [- 1 ] == 'A' }
113113X_cross_rings = {c for c in cut_type_dict if c [- 1 ] == 'X' }
114- ranks = ['Alpha' , 'Beta' , 'Gamma' , 'Delta' , 'Epsilon' , 'Zeta' , 'Eta' ]
114+ ranks = ['Alpha' , 'Beta' , 'Gamma' , 'Delta' , 'Epsilon' , 'Zeta' , 'Eta' , 'Theta' , 'Iota' , 'Kappa' , 'Lambda' , 'Mu' ]
115115
116116AA_masses = {'A' :71.0371 ,'R' :156.1011 ,'N' :114.0429 ,'D' :115.0269 ,
117117'C' :103.0091 ,'E' :129.0425 ,'Q' :128.0585 ,'G' :57.0214 ,'H' :137.0589 ,
121121mono_attributes = mono_attributes | tester_ma_addition
122122bond_masses = {'red_bond' :18.010 ,'no_bond' :- 18.010 ,'peptide_b' :- 18.010 ,'peptide_c' :- 1 ,'peptide_z' :- 16 ,'peptide_a' :- 46 }
123123
124+ def _get_permethylated_attributes (attributes ):
125+ """Creates a modified copy of mono_attributes for permethylation."""
126+ permethylated_attr = copy .deepcopy (attributes )
127+ methyl_mass = 14.01565 # Mass of CH3 - H
128+
129+ # Number of reactive sites (OH, NH, COOH) for permethylation
130+ methylation_sites = {
131+ 'Hex' : 4 , 'dHex' : 3 , 'Pen' : 3 ,
132+ 'HexNAc' : 4 , # 3 OH + 1 NH
133+ 'HexA' : 4 , # 3 OH + 1 COOH
134+ 'Neu5Ac' : 6 , # 3 OH + 1 NH + 1 COOH + 1 OH on glycerol tail
135+ 'Neu5Gc' : 6 , # 4 OH + 1 NH + 1 COOH
136+ 'Kdn' : 5 , # 4 OH + 1 COOH
137+ 'HexS' : 3 , # Assuming S replaces an OH
138+ 'HexNAcOS' : 3 , # Assuming S replaces an OH
139+ 'HexP' : 3 # Assuming P replaces an OH
140+ }
141+
142+ for mono_key , mono_vals in permethylated_attr .items ():
143+ # Use map_to_basic to identify the core monosaccharide type
144+ base_type = map_to_basic (mono_key , obfuscate_ptm = False )
145+ sites = methylation_sites .get (base_type , 0 )
146+
147+ if not sites :
148+ continue
149+
150+ # Adjust site count for modifications that replace a hydroxyl group
151+ if 'S' in mono_key and 'OS' not in mono_key :
152+ sites -= 1
153+ if 'P' in mono_key :
154+ sites -= 1
155+
156+ # Apply the mass increase to the full monosaccharide mass
157+ if sites > 0 and mono_key in mono_vals .get ('mass' , {}):
158+ mono_vals ['mass' ][mono_key ] += sites * methyl_mass
159+
160+ return permethylated_attr
161+
162+
124163def evaluate_adjacency_monos (glycan_part , adjustment ):
125164 """Modified version of evaluate_adjacency to check glycoletter adjacency for monosaccharide only strings\n
126165 | Arguments:
@@ -371,7 +410,7 @@ def atom_mods_init(subg, present_breakages, terminals, terminal_labels):
371410 return atomic_mod_dict
372411
373412
374- def get_mono_mods_list (root_node , subg , terminals , terminal_labels , nx_edge_dict , allowed_X_cleavages ):
413+ def get_mono_mods_list (root_node , subg , terminals , terminal_labels , nx_edge_dict , allowed_X_cleavages , mono_attributes_in = mono_attributes ):
375414 """Determines all possible cross-ring modifications for each node label in terminals\n
376415 | Arguments:
377416 | :-
@@ -394,7 +433,7 @@ def get_mono_mods_list(root_node, subg, terminals, terminal_labels, nx_edge_dict
394433 elif subg .degree ()[node ] > 1 :
395434 terminal_mods .append ([label ])
396435 else :
397- terminal_mods .append ([x for x in mono_attributes [basic_label ]['mass' ] if x in allowed_X_cleavages or x == basic_label ])
436+ terminal_mods .append ([x for x in mono_attributes_in [basic_label ]['mass' ] if x in allowed_X_cleavages or x == basic_label ])
398437 return terminal_mods
399438
400439
@@ -464,7 +503,7 @@ def generate_mod_permutations(terminals, terminal_labels, mono_mods_list, atomic
464503 return all_mono_mods , all_terminal_perms
465504
466505
467- def precalculate_mod_masses (all_mono_mods , all_terminal_perms , terminal_labels , global_mods ):
506+ def precalculate_mod_masses (all_mono_mods , all_terminal_perms , terminal_labels , global_mods , mono_attributes_in = mono_attributes ):
468507 """Determines the masses of all possible monosaccharide modifications and their respective atom level representations\n
469508 | Arguments:
470509 | :-
@@ -478,7 +517,7 @@ def precalculate_mod_masses(all_mono_mods, all_terminal_perms, terminal_labels,
478517 | (2) a list of all possible mass combinations for each bond fragmentation combination
479518 | (3) a list of masses corresponding to each of the global mods
480519 """
481- all_mono_mod_masses = [[mono_attributes [map_to_basic (label , obfuscate_ptm = False )]['mass' ][map_to_basic (mod , obfuscate_ptm = False )] for mod in mods ] for mods , label in zip (all_mono_mods , terminal_labels )]
520+ all_mono_mod_masses = [[mono_attributes_in [map_to_basic (label , obfuscate_ptm = False )]['mass' ][map_to_basic (mod , obfuscate_ptm = False )] for mod in mods ] for mods , label in zip (all_mono_mods , terminal_labels )]
482521
483522 all_atom_dict_masses = []
484523 for node in all_terminal_perms :
@@ -488,7 +527,7 @@ def precalculate_mod_masses(all_mono_mods, all_terminal_perms, terminal_labels,
488527 node_dict_masses .append (sum (present_atom_mods ))
489528 all_atom_dict_masses .append (node_dict_masses )
490529
491- global_mods_mass = [mono_attributes ['Global' ]['mass' ][x ] for x in global_mods [1 :]]
530+ global_mods_mass = [mono_attributes_in ['Global' ]['mass' ][x ] for x in global_mods [1 :]]
492531
493532 return product (* all_mono_mod_masses ), product (* all_atom_dict_masses ), global_mods_mass
494533
@@ -635,7 +674,7 @@ def annotate_subgraph(subg,node_mod,global_mod,terminals):
635674 return mod_subg
636675
637676def generate_atomic_frags (nx_mono , global_mods , special_residues , allowed_X_cleavages , max_cleavages = 3 , fragment_masses = [],
638- threshold = 0.5 , mass_tag = None , charge = - 1 ):
677+ threshold = 0.5 , mass_tag = None , charge = - 1 , permethylated = False ):
639678 """Calculates the graph and mass of all possible fragments of the input\n
640679 | Arguments:
641680 | :-
@@ -652,7 +691,8 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
652691 | :-
653692 | Returns a dict of lists of networkx subgraphs
654693 """
655- if not mass_tag :
694+ effective_mono_attributes = _get_permethylated_attributes (mono_attributes ) if permethylated else mono_attributes
695+ if mass_tag is None :
656696 mass_tag = 2.0156
657697 charge_masses = np .array (extend_masses (fragment_masses , charge ))
658698 threshold = abs (threshold )
@@ -663,8 +703,8 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
663703 node_dict_basic = {k : map_to_basic (v , obfuscate_ptm = False ) for k , v in node_dict .items ()}
664704 subgraph_fragments = {}
665705 subgraphs = enumerate_subgraphs (nx_mono ) + [nx_mono ]
666- max_global_mass = max (mono_attributes ['Global' ]['mass' ].values ())
667- min_global_mass = min (mono_attributes ['Global' ]['mass' ].values ())
706+ max_global_mass = max (effective_mono_attributes ['Global' ]['mass' ].values ())
707+ min_global_mass = min (effective_mono_attributes ['Global' ]['mass' ].values ())
668708 nx_deg = nx_mono .degree
669709 for i ,subg in enumerate (subgraphs ):
670710 terminals = get_terminals (nx_deg ,subg )
@@ -673,8 +713,10 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
673713 continue
674714 other_terminals = [x for x in subg .nodes if x in all_other_terminals and x not in terminals ]
675715 terminals = terminals + other_terminals
676- inner_mass = sum ([mono_attributes [node_dict_basic [m ]]['mass' ][node_dict_basic [m ]] for m in subg .nodes () if m not in terminals ])
677- max_graph_mass = inner_mass + sum ([mono_attributes [node_dict_basic [m ]]['mass' ][node_dict_basic [m ]] for m in terminals ]) + 18.0105546 * len (terminals )
716+ inner_mass = sum ([effective_mono_attributes [node_dict_basic [m ]]['mass' ][node_dict_basic [m ]] for m in subg .nodes () if m not in terminals ])
717+ if permethylated :
718+ inner_mass -= subg .number_of_edges ()* 14.01565
719+ max_graph_mass = inner_mass + sum ([effective_mono_attributes [node_dict_basic [m ]]['mass' ][node_dict_basic [m ]] for m in terminals ]) + 18.0105546 * len (terminals )
678720 max_graph_mass += max_global_mass
679721 min_graph_mass = inner_mass + min_global_mass
680722 avg_graph_mass = (min_graph_mass + max_graph_mass )/ 2
@@ -690,10 +732,12 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
690732 present_breakages = get_broken_bonds (subg , nx_mono , nx_edge_dict )
691733 root_node = [v for v , d in subg .out_degree () if d == 0 ][0 ]
692734 atomic_mod_dict_subg = atom_mods_init (subg , present_breakages , terminals , terminal_labels )
693- mono_mods_list = get_mono_mods_list (root_node , subg , terminals , terminal_labels , nx_edge_dict , allowed_X_cleavages )
735+ mono_mods_list = get_mono_mods_list (root_node , subg , terminals , terminal_labels , nx_edge_dict , allowed_X_cleavages , mono_attributes_in = effective_mono_attributes )
694736 mono_mod_perms , atom_dict_perms = generate_mod_permutations (terminals , terminal_labels , mono_mods_list , atomic_mod_dict_subg )
695- mono_masses , atom_masses , global_masses = precalculate_mod_masses (mono_mod_perms , atom_dict_perms , terminal_labels , subg_global_mods )
737+ mono_masses , atom_masses , global_masses = precalculate_mod_masses (mono_mod_perms , atom_dict_perms , terminal_labels , subg_global_mods , mono_attributes_in = effective_mono_attributes )
696738 initial_masses = np .array (preliminary_calculate_mass (mono_masses , atom_masses , global_masses , terminals , inner_mass , bonus_root_mass , bonus_root_node , mass_tag , charge , mono_mod_perms ))
739+ if permethylated :
740+ initial_masses = initial_masses - subg .number_of_edges ()* 14.01565
697741 valid_idx = np .where (check_masses (charge_masses , initial_masses , threshold ))[0 ]
698742 if valid_idx .size == 0 :
699743 continue
@@ -1269,7 +1313,7 @@ def glycopeptide_string_to_input(gpep_string):
12691313@rescue_glycans
12701314def CandyCrumbs (input_string , fragment_masses , mass_threshold ,
12711315 max_cleavages = 3 , simplify = True , charge = - 1 , mass_tag = None ,
1272- iupac = False , intensities = None , disable_global_mods = False , disable_X_cross_rings = False ):
1316+ iupac = False , intensities = None , disable_global_mods = False , disable_X_cross_rings = False , permethylated = False ):
12731317 """Basic wrapper for the annotation of observed masses with correct nomenclature given a glycan\n
12741318 | Arguments:
12751319 | :-
@@ -1291,7 +1335,7 @@ def CandyCrumbs(input_string, fragment_masses, mass_threshold,
12911335 nx_mono ,pep_gr = input_to_graph (input_dict )
12921336 global_mods ,special_residues = get_initial_global_mods (nx_mono , charge ,disable_global_mods = disable_global_mods )
12931337 allowed_X_cleavages = [] if disable_X_cross_rings else X_cross_rings
1294- subg_frags = generate_atomic_frags (nx_mono , global_mods , special_residues , allowed_X_cleavages ,max_cleavages = max_cleavages , fragment_masses = fragment_masses , threshold = mass_threshold , mass_tag = mass_tag , charge = charge )
1338+ subg_frags = generate_atomic_frags (nx_mono , global_mods , special_residues , allowed_X_cleavages ,max_cleavages = max_cleavages , fragment_masses = fragment_masses , threshold = mass_threshold , mass_tag = mass_tag , charge = charge , permethylated = permethylated )
12951339 downstream_values = []
12961340 if input_dict ['peptide' ]:
12971341 peptide = True
0 commit comments