-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
128 lines (104 loc) · 4.4 KB
/
data.py
File metadata and controls
128 lines (104 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 17 11:01:01 2022
@author: huzongxiang
"""
from pathlib import Path
from multiprocessing import Pool
import pickle as pkl
from Bio.PDB.PDBParser import PDBParser
residue_codes = {
'ALA' : 0, 'CYS' : 1, 'ASP' : 2, 'GLU' : 3,
'PHE' : 4, 'GLY' : 5, 'HIS' : 6, 'LYS' : 7,
'ILE' : 8, 'LEU' : 9, 'MET' : 10, 'ASN' : 11,
'PRO' : 12, 'GLN' : 13, 'ARG' : 14,'SER' : 15,
'THR' : 16, 'VAL' : 17,'TYR' : 18, 'TRP' : 19}
parser = PDBParser(PERMISSIVE=1)
def pdb_files(pdb_path):
if not isinstance(pdb_path, Path):
raise TypeError("path should be Path import from pathlib like Path(your_path)")
if not pdb_path.exists():
raise IOError("path not exists")
pdb_files = sorted(pdb_path.glob('*.pdb'))
return pdb_files
def pdb2respos(pdb_file):
if not pdb_file.exists():
raise IOError("file not exists")
pdb_id = pdb_file.name.split('.')[0]
structure = parser.get_structure(pdb_id, pdb_file)
structure_dict = {}
hetatm = {}
disorder_residues = {}
former_code = ""
for chain in structure.get_chains():
chain_dict = {}
disorder_dict = {}
hetatm_dict = {}
for residue in chain.get_residues():
residue_dict = {}
code = residue.get_id()[1]
icode = residue.get_id()[-1]
resname = residue.get_resname() + "_" + str(code)
if residue.is_disordered() < 2:
if icode == " ":
if residue.get_id()[0][0] == " " and residue.get_resname()[-3:] in residue_codes:
for atom in residue:
if atom.name in ['CA', 'C', 'N', 'O']:
residue_dict[atom.name] = atom.get_coord().tolist()
chain_dict[resname] = residue_dict
else:
for atom in residue:
residue_dict[atom.name] = atom.get_coord().tolist()
hetatm_dict[resname] = residue_dict
elif icode == "A":
if former_code != code:
for atom in residue:
if atom.name in ['CA', 'C', 'N', 'O']:
residue_dict[atom.name] = atom.get_coord().tolist()
chain_dict[resname] = residue_dict
else:
for atom in residue:
if atom.name in ['CA', 'C', 'N', 'O']:
residue_dict[atom.name] = atom.get_coord().tolist()
disorder_dict[resname + icode] = residue_dict
else:
for atom in residue:
if atom.name in ['CA', 'C', 'N', 'O']:
residue_dict[atom.name] = atom.get_coord().tolist()
disorder_dict[resname + icode] = residue_dict
else:
for i, residue in enumerate(residue.disordered_get_list()):
for atom in residue:
if atom.name in ['CA', 'C', 'N', 'O']:
residue_dict[atom.name] = atom.get_coord().tolist()
if i == 0:
chain_dict[resname] = residue_dict
else:
disorder_dict[resname] = residue_dict
former_code = residue.get_id()[1]
structure_dict[chain.get_id()] = chain_dict
disorder_residues[chain.get_id()] = disorder_dict
hetatm[chain.get_id()] = hetatm_dict
return {pdb_id : {"residues" : structure_dict, "disorder_residues": disorder_residues, "hetatm" : hetatm}}
def get_respos(pdbs):
pool = Pool()
res_pos = pool.map(pdb2respos, pdbs)
pool.close()
pool.join()
return res_pos
def to_pkl(res_pos, save_path=None):
if save_path is None:
raise IOError("save_path should be given")
pkl_file = Path(save_path/"protein_respos.pkl")
with open(pkl_file,'wb') as f:
pkl.dump(res_pos, f, protocol=pkl.HIGHEST_PROTOCOL)
def load_data(path):
if not isinstance(path, Path):
raise TypeError("path should be Path import from pathlib like Path(your_path)")
if not path.exists():
raise IOError("path not exists")
print("read datas...")
with open(path,'rb') as f:
datas = pkl.load(f)
print("done")
return datas