-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfingerprints.py
More file actions
21 lines (19 loc) · 872 Bytes
/
fingerprints.py
File metadata and controls
21 lines (19 loc) · 872 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import sys
import os
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator
def morgan_fp(sdf_file: str, out_npy: str, nbits:int=2048, radius:int=3):
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=radius,fpSize=nbits)
suppl = Chem.SDMolSupplier(sdf_file)
fps = list(mfpgen.GetFingerprint(m) for m in suppl if m is not None)
npfps = np.array(fps).astype(np.uint8) # by default numpy will use dtype=int64. uint8 is more efficient as only {0, 1} are used.
np.save(out_npy, npfps)
if __name__ == '__main__':
if len(sys.argv) != 5:
print(f'Usage: {sys.argv[0]} <input sdf file> <output npy file> <Morgan FP radius> <Morgan FP bits>')
sdf_path = sys.argv[1]
npy = sys.argv[2]
radius = int(sys.argv[3])
nbits = int(sys.argv[4])
morgan_fp(sdf_path, npy, nbits=nbits, radius=radius)