Skip to content

Commit 5f2a2f0

Browse files
committed
WIP: permethylation mass adjustments
1 parent ee8e1a8 commit 5f2a2f0

2 files changed

Lines changed: 61 additions & 17 deletions

File tree

candycrunch/analysis.py

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111
'02X': '02X', '03X': '03X', '04X': '04X', '12X': '12X', '13X': '13X', '14X': '14X', '15X': '15X', '24X': '24X', '35X': '35X'}
112112
A_cross_rings = {c for c in cut_type_dict if c[-1] == 'A'}
113113
X_cross_rings = {c for c in cut_type_dict if c[-1] == 'X'}
114-
ranks = ['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta']
114+
ranks = ['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta', 'Iota','Kappa', 'Lambda','Mu']
115115

116116
AA_masses = {'A':71.0371,'R':156.1011,'N':114.0429,'D':115.0269,
117117
'C':103.0091,'E':129.0425,'Q':128.0585,'G':57.0214,'H':137.0589,
@@ -121,6 +121,45 @@
121121
mono_attributes = mono_attributes|tester_ma_addition
122122
bond_masses = {'red_bond':18.010,'no_bond':-18.010,'peptide_b':-18.010,'peptide_c':-1,'peptide_z':-16,'peptide_a':-46}
123123

124+
def _get_permethylated_attributes(attributes):
125+
"""Creates a modified copy of mono_attributes for permethylation."""
126+
permethylated_attr = copy.deepcopy(attributes)
127+
methyl_mass = 14.01565 # Mass of CH3 - H
128+
129+
# Number of reactive sites (OH, NH, COOH) for permethylation
130+
methylation_sites = {
131+
'Hex': 4, 'dHex': 3, 'Pen': 3,
132+
'HexNAc': 4, # 3 OH + 1 NH
133+
'HexA': 4, # 3 OH + 1 COOH
134+
'Neu5Ac': 6, # 3 OH + 1 NH + 1 COOH + 1 OH on glycerol tail
135+
'Neu5Gc': 6, # 4 OH + 1 NH + 1 COOH
136+
'Kdn': 5, # 4 OH + 1 COOH
137+
'HexS': 3, # Assuming S replaces an OH
138+
'HexNAcOS': 3, # Assuming S replaces an OH
139+
'HexP': 3 # Assuming P replaces an OH
140+
}
141+
142+
for mono_key, mono_vals in permethylated_attr.items():
143+
# Use map_to_basic to identify the core monosaccharide type
144+
base_type = map_to_basic(mono_key, obfuscate_ptm=False)
145+
sites = methylation_sites.get(base_type, 0)
146+
147+
if not sites:
148+
continue
149+
150+
# Adjust site count for modifications that replace a hydroxyl group
151+
if 'S' in mono_key and 'OS' not in mono_key:
152+
sites -= 1
153+
if 'P' in mono_key:
154+
sites -= 1
155+
156+
# Apply the mass increase to the full monosaccharide mass
157+
if sites > 0 and mono_key in mono_vals.get('mass', {}):
158+
mono_vals['mass'][mono_key] += sites * methyl_mass
159+
160+
return permethylated_attr
161+
162+
124163
def evaluate_adjacency_monos(glycan_part, adjustment):
125164
"""Modified version of evaluate_adjacency to check glycoletter adjacency for monosaccharide only strings\n
126165
| Arguments:
@@ -371,7 +410,7 @@ def atom_mods_init(subg, present_breakages, terminals, terminal_labels):
371410
return atomic_mod_dict
372411

373412

374-
def get_mono_mods_list(root_node, subg, terminals, terminal_labels, nx_edge_dict, allowed_X_cleavages):
413+
def get_mono_mods_list(root_node, subg, terminals, terminal_labels, nx_edge_dict, allowed_X_cleavages,mono_attributes_in =mono_attributes):
375414
"""Determines all possible cross-ring modifications for each node label in terminals\n
376415
| Arguments:
377416
| :-
@@ -394,7 +433,7 @@ def get_mono_mods_list(root_node, subg, terminals, terminal_labels, nx_edge_dict
394433
elif subg.degree()[node] > 1:
395434
terminal_mods.append([label])
396435
else:
397-
terminal_mods.append([x for x in mono_attributes[basic_label]['mass'] if x in allowed_X_cleavages or x == basic_label])
436+
terminal_mods.append([x for x in mono_attributes_in[basic_label]['mass'] if x in allowed_X_cleavages or x == basic_label])
398437
return terminal_mods
399438

400439

@@ -464,7 +503,7 @@ def generate_mod_permutations(terminals, terminal_labels, mono_mods_list, atomic
464503
return all_mono_mods, all_terminal_perms
465504

466505

467-
def precalculate_mod_masses(all_mono_mods, all_terminal_perms, terminal_labels, global_mods):
506+
def precalculate_mod_masses(all_mono_mods, all_terminal_perms, terminal_labels, global_mods,mono_attributes_in= mono_attributes):
468507
"""Determines the masses of all possible monosaccharide modifications and their respective atom level representations\n
469508
| Arguments:
470509
| :-
@@ -478,7 +517,7 @@ def precalculate_mod_masses(all_mono_mods, all_terminal_perms, terminal_labels,
478517
| (2) a list of all possible mass combinations for each bond fragmentation combination
479518
| (3) a list of masses corresponding to each of the global mods
480519
"""
481-
all_mono_mod_masses = [[mono_attributes[map_to_basic(label, obfuscate_ptm = False)]['mass'][map_to_basic(mod, obfuscate_ptm = False)] for mod in mods] for mods, label in zip(all_mono_mods, terminal_labels)]
520+
all_mono_mod_masses = [[mono_attributes_in[map_to_basic(label, obfuscate_ptm = False)]['mass'][map_to_basic(mod, obfuscate_ptm = False)] for mod in mods] for mods, label in zip(all_mono_mods, terminal_labels)]
482521

483522
all_atom_dict_masses = []
484523
for node in all_terminal_perms:
@@ -488,7 +527,7 @@ def precalculate_mod_masses(all_mono_mods, all_terminal_perms, terminal_labels,
488527
node_dict_masses.append(sum(present_atom_mods))
489528
all_atom_dict_masses.append(node_dict_masses)
490529

491-
global_mods_mass = [mono_attributes['Global']['mass'][x] for x in global_mods[1:]]
530+
global_mods_mass = [mono_attributes_in['Global']['mass'][x] for x in global_mods[1:]]
492531

493532
return product(*all_mono_mod_masses), product(*all_atom_dict_masses), global_mods_mass
494533

@@ -635,7 +674,7 @@ def annotate_subgraph(subg,node_mod,global_mod,terminals):
635674
return mod_subg
636675

637676
def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_cleavages, max_cleavages = 3, fragment_masses = [],
638-
threshold = 0.5, mass_tag = None, charge = -1):
677+
threshold = 0.5, mass_tag = None, charge = -1,permethylated=False):
639678
"""Calculates the graph and mass of all possible fragments of the input\n
640679
| Arguments:
641680
| :-
@@ -652,7 +691,8 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
652691
| :-
653692
| Returns a dict of lists of networkx subgraphs
654693
"""
655-
if not mass_tag:
694+
effective_mono_attributes = _get_permethylated_attributes(mono_attributes) if permethylated else mono_attributes
695+
if mass_tag is None:
656696
mass_tag = 2.0156
657697
charge_masses = np.array(extend_masses(fragment_masses, charge))
658698
threshold = abs(threshold)
@@ -663,8 +703,8 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
663703
node_dict_basic = {k: map_to_basic(v, obfuscate_ptm = False) for k, v in node_dict.items()}
664704
subgraph_fragments = {}
665705
subgraphs = enumerate_subgraphs(nx_mono) + [nx_mono]
666-
max_global_mass = max(mono_attributes['Global']['mass'].values())
667-
min_global_mass = min(mono_attributes['Global']['mass'].values())
706+
max_global_mass = max(effective_mono_attributes['Global']['mass'].values())
707+
min_global_mass = min(effective_mono_attributes['Global']['mass'].values())
668708
nx_deg = nx_mono.degree
669709
for i,subg in enumerate(subgraphs):
670710
terminals = get_terminals(nx_deg,subg)
@@ -673,8 +713,10 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
673713
continue
674714
other_terminals = [x for x in subg.nodes if x in all_other_terminals and x not in terminals]
675715
terminals = terminals+other_terminals
676-
inner_mass = sum([mono_attributes[node_dict_basic[m]]['mass'][node_dict_basic[m]] for m in subg.nodes() if m not in terminals])
677-
max_graph_mass = inner_mass + sum([mono_attributes[node_dict_basic[m]]['mass'][node_dict_basic[m]] for m in terminals]) + 18.0105546*len(terminals)
716+
inner_mass = sum([effective_mono_attributes[node_dict_basic[m]]['mass'][node_dict_basic[m]] for m in subg.nodes() if m not in terminals])
717+
if permethylated:
718+
inner_mass -= subg.number_of_edges()*14.01565
719+
max_graph_mass = inner_mass + sum([effective_mono_attributes[node_dict_basic[m]]['mass'][node_dict_basic[m]] for m in terminals]) + 18.0105546*len(terminals)
678720
max_graph_mass += max_global_mass
679721
min_graph_mass = inner_mass + min_global_mass
680722
avg_graph_mass = (min_graph_mass+max_graph_mass)/2
@@ -690,10 +732,12 @@ def generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_clea
690732
present_breakages = get_broken_bonds(subg, nx_mono, nx_edge_dict)
691733
root_node = [v for v, d in subg.out_degree() if d == 0][0]
692734
atomic_mod_dict_subg = atom_mods_init(subg, present_breakages, terminals, terminal_labels)
693-
mono_mods_list = get_mono_mods_list(root_node, subg, terminals, terminal_labels, nx_edge_dict, allowed_X_cleavages)
735+
mono_mods_list = get_mono_mods_list(root_node, subg, terminals, terminal_labels, nx_edge_dict, allowed_X_cleavages,mono_attributes_in=effective_mono_attributes)
694736
mono_mod_perms, atom_dict_perms = generate_mod_permutations(terminals, terminal_labels, mono_mods_list, atomic_mod_dict_subg)
695-
mono_masses, atom_masses, global_masses = precalculate_mod_masses(mono_mod_perms, atom_dict_perms, terminal_labels, subg_global_mods)
737+
mono_masses, atom_masses, global_masses = precalculate_mod_masses(mono_mod_perms, atom_dict_perms, terminal_labels, subg_global_mods,mono_attributes_in=effective_mono_attributes)
696738
initial_masses = np.array(preliminary_calculate_mass(mono_masses, atom_masses, global_masses, terminals, inner_mass, bonus_root_mass, bonus_root_node, mass_tag, charge, mono_mod_perms))
739+
if permethylated:
740+
initial_masses = initial_masses - subg.number_of_edges()*14.01565
697741
valid_idx = np.where(check_masses(charge_masses, initial_masses, threshold))[0]
698742
if valid_idx.size == 0:
699743
continue
@@ -1269,7 +1313,7 @@ def glycopeptide_string_to_input(gpep_string):
12691313
@rescue_glycans
12701314
def CandyCrumbs(input_string, fragment_masses, mass_threshold,
12711315
max_cleavages = 3, simplify = True, charge = -1, mass_tag = None,
1272-
iupac = False, intensities = None, disable_global_mods=False, disable_X_cross_rings=False):
1316+
iupac = False, intensities = None, disable_global_mods=False, disable_X_cross_rings=False,permethylated=False):
12731317
"""Basic wrapper for the annotation of observed masses with correct nomenclature given a glycan\n
12741318
| Arguments:
12751319
| :-
@@ -1291,7 +1335,7 @@ def CandyCrumbs(input_string, fragment_masses, mass_threshold,
12911335
nx_mono,pep_gr = input_to_graph(input_dict)
12921336
global_mods,special_residues = get_initial_global_mods(nx_mono, charge,disable_global_mods = disable_global_mods)
12931337
allowed_X_cleavages = [] if disable_X_cross_rings else X_cross_rings
1294-
subg_frags = generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_cleavages,max_cleavages = max_cleavages, fragment_masses = fragment_masses, threshold = mass_threshold, mass_tag = mass_tag, charge = charge)
1338+
subg_frags = generate_atomic_frags(nx_mono, global_mods, special_residues, allowed_X_cleavages,max_cleavages = max_cleavages, fragment_masses = fragment_masses, threshold = mass_threshold, mass_tag = mass_tag, charge = charge,permethylated=permethylated)
12951339
downstream_values = []
12961340
if input_dict['peptide']:
12971341
peptide=True

tests/test_CandyCrunch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from tabulate import tabulate
77
import numpy as np
88
from collections import defaultdict
9-
9+
print('started')
1010
from candycrunch.prediction import *
1111
from glycowork.motif.graph import compare_glycans,get_possible_topologies,graph_to_string
1212
from itertools import product

0 commit comments

Comments
 (0)