-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctional_group.py
More file actions
64 lines (58 loc) · 2.9 KB
/
functional_group.py
File metadata and controls
64 lines (58 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from rdkit import Chem
from rdkit import RDLogger
functional_groups = {
'Acid anhydride': Chem.MolFromSmarts('[CX3](=[OX1])[OX2][CX3](=[OX1])'),
'Acyl halide': Chem.MolFromSmarts('[CX3](=[OX1])[F,Cl,Br,I]'),
'Alcohol': Chem.MolFromSmarts('[#6][OX2H]'),
'Aldehyde': Chem.MolFromSmarts('[CX3H1](=O)[#6,H]'),
'Alkane': Chem.MolFromSmarts('[CX4;H3,H2]'),
'Alkene': Chem.MolFromSmarts('[CX3]=[CX3]'),
'Alkyne': Chem.MolFromSmarts('[CX2]#[CX2]'),
'Amide': Chem.MolFromSmarts('[NX3][CX3](=[OX1])[#6]'),
'Amine': Chem.MolFromSmarts('[NX3;H2,H1,H0;!$(NC=O)]'),
'Arene': Chem.MolFromSmarts('[cX3]1[cX3][cX3][cX3][cX3][cX3]1'),
'Azo compound': Chem.MolFromSmarts('[#6][NX2]=[NX2][#6]'),
'Carbamate': Chem.MolFromSmarts('[NX3][CX3](=[OX1])[OX2H0]'),
'Carboxylic acid': Chem.MolFromSmarts('[CX3](=O)[OX2H]'),
'Enamine': Chem.MolFromSmarts('[NX3][CX3]=[CX3]'),
'Enol': Chem.MolFromSmarts('[OX2H][#6X3]=[#6]'),
'Ester': Chem.MolFromSmarts('[#6][CX3](=O)[OX2H0][#6]'),
'Ether': Chem.MolFromSmarts('[OD2]([#6])[#6]'),
'Haloalkane': Chem.MolFromSmarts('[#6][F,Cl,Br,I]'),
'Hydrazine': Chem.MolFromSmarts('[NX3][NX3]'),
'Hydrazone': Chem.MolFromSmarts('[NX3][NX2]=[#6]'),
'Imide': Chem.MolFromSmarts('[CX3](=[OX1])[NX3][CX3](=[OX1])'),
'Imine': Chem.MolFromSmarts('[$([CX3]([#6])[#6]),$([CX3H][#6])]=[$([NX2][#6]),$([NX2H])]'),
'Isocyanate': Chem.MolFromSmarts('[NX2]=[C]=[O]'),
'Isothiocyanate': Chem.MolFromSmarts('[NX2]=[C]=[S]'),
'Ketone': Chem.MolFromSmarts('[#6][CX3](=O)[#6]'),
'Nitrile': Chem.MolFromSmarts('[NX1]#[CX2]'),
'Phenol': Chem.MolFromSmarts('[OX2H][cX3]:[c]'),
'Phosphine': Chem.MolFromSmarts('[PX3]'),
'Sulfide': Chem.MolFromSmarts('[#16X2H0]'),
'Sulfonamide': Chem.MolFromSmarts('[#16X4]([NX3])(=[OX1])(=[OX1])[#6]'),
'Sulfonate': Chem.MolFromSmarts('[#16X4](=[OX1])(=[OX1])([#6])[OX2H0]'),
'Sulfone': Chem.MolFromSmarts('[#16X4](=[OX1])(=[OX1])([#6])[#6]'),
'Sulfonic acid': Chem.MolFromSmarts('[#16X4](=[OX1])(=[OX1])([#6])[OX2H]'),
'Sulfoxide': Chem.MolFromSmarts('[#16X3]=[OX1]'),
'Thial': Chem.MolFromSmarts('[CX3H1](=S)[#6,H]'),
'Thioamide': Chem.MolFromSmarts('[NX3][CX3]=[SX1]'),
'Thiol': Chem.MolFromSmarts('[#16X2H]')
}
def match_group(mol: Chem.Mol, func_group) -> int:
if type(func_group) == Chem.Mol:
n = len(mol.GetSubstructMatches(func_group))
else:
n = func_group(mol)
return 0 if n == 0 else 1
def get_functional_groups(smiles: str) -> dict:
RDLogger.DisableLog('rdApp.*')
smiles = smiles.strip().replace(' ', '')
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
func_groups = list()
for func_group_name, smarts in functional_groups.items():
func_groups.append(match_group(mol, smarts))
return func_groups
# data.smiles.map(get_functional_groups)