-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstructure_data.py
More file actions
210 lines (175 loc) · 7.09 KB
/
structure_data.py
File metadata and controls
210 lines (175 loc) · 7.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# builtin modules
import os
import logging
# third party modules
import numpy as np
# geobind modules
from get_atom_kdtree import getAtomKDTree
from get_surface_residues import getSurfaceResidues
class StructureData(object):
def __init__(self, structure, name='structure', path='.'):
try:
from Bio.PDB import PDBParser, MMCIFParser
from Bio.PDB.Entity import Entity
except ModuleNotFoundError:
raise ModuleNotFoundError("BioPython is a required dependency for structure-related functions!")
if isinstance(structure, str):
file_type = (str(structure).split('.')[-1]).lower()
if file_type in ('pdb', 'ent'):
# load a PDB file
__parser = PDBParser(PERMISSIVE=1, QUIET=True)
self.structure = __parser.get_structure(name, os.path.join(path, structure))
elif file_type == 'cif':
# load MMCIF file
__parser = MMCIFParser(QUIET=True)
self.structure = __parser.get_structure(name, os.path.join(path, structure))
else:
raise ValueError("Unknown filetype for structure file name: {}".format(structure))
elif isinstance(structure, Entity):
# use structure as-is
self.structure = structure
else:
raise ValueError("Unknown type for input argument 'structure': {}".format(str(structure)))
# properties
self.name = name
# cachable properties
self.cache = {}
self._atom_KDTree = None
self._atom_list = None
self._surface_residues = None
def __contains__(self, key):
return key in self.structure
def __getitem__(self, key):
return self.structure[key]
def __iter__(self):
for item in self.structure:
yield item
@classmethod
def slice(cls, obj, selection, name='slice'):
"""Create a new Structure object 'S2' from a slice of the current one, 'S1'. <selection>
defines which descendents 'S1' will be stored in 'S2'."""
from Bio.PDB.Structure import Structure
from Bio.PDB.Model import Model
from Bio.PDB.Chain import Chain
ent = Structure(name) # Biopython structure object
# Loop over selection and determine what model/chain objects we need to create in order to
# store the slice
models = {}
for item in selection:
mid = item[1]
cid = item[2]
if mid not in models:
models[mid] = set() # store chain ids
models[mid].add(cid)
# Create model/chains to store slice
for mid in models:
ent.add(Model(mid))
for cid in models[mid]:
ent[mid].add(Chain(cid))
# Add residues to slice
for item in selection:
mid = item[1]
cid = item[2]
rid = item[3]
item = obj[mid][cid][rid].copy()
item.detach_parent()
ent[mid][cid].add(item)
return cls(ent, name=name)
@property
def atom_list(self):
if "atom_list" not in self.cache:
self.cache["atom_list"] = [atom for atom in self.get_atoms()]
return self.cache["atom_list"]
@property
def atom_KDTree(self):
if "kdtree" not in self.cache:
self.cache["kdtree"] = getAtomKDTree(self.atom_list, engine='biopython')
return self.cache["kdtree"]
def get_parent(self):
return self.structure.get_parent()
def add(self, item):
self.structure.add(item)
def get_level(self):
return self.structure.get_level()
def get_models(self):
if self.get_level() in ('S'):
return self.structure.get_models()
else:
raise AttributeError("This method is only defined for 'Structure' level objects!")
def get_chains(self):
if self.get_level() in ('S', 'M'):
return self.structure.get_chains()
else:
raise AttributeError("This method is only defined for 'Structure' and 'Model' level objects!")
def get_residues(self):
if self.get_level() in ('S', 'M', 'C'):
return self.structure.get_residues()
else:
raise AttributeError("This method is only defined for 'Structure', 'Model' and 'Chain' level objects!")
def get_atoms(self):
if self.get_level() == 'A':
return [self.structure]
else:
return self.structure.get_atoms()
def get_residue(self, res_id, chain_id=None, mi=0):
if self.get_level() == 'S':
return self.structure[mi][chain_id][res_id]
elif self.get_level() == 'M':
return self.structure[chain_id][res_id]
elif self.get_level() == 'C':
return self.structure[res_id]
else:
return None
def detach_child(item):
self.structure.detach_child(item)
def get_surface_residues(self, hydrogens=False, area_key='sesa'):
if(self._surface_residues is not None):
return self._surface_residues
else:
self._surface_residues = getSurfaceResidues(self.structure, area_key=area_key, hydrogens=hydrogens)
return self._surface_residues
def getNearestNeighbor(self, atom, cutoff=3.0, eps=1e-5, hydrogens=True):
neighbors = self.atom_KDTree.search(atom.coord, cutoff)
mindist = 99999
nn = None
for n in neighbors:
if (n.element == 'H') and (not hydrogens):
continue
dist = np.linalg.norm(atom.coord - n.coord)
if dist < mindist and dist > eps:
mindist = dist
nn = n
return nn
def save(self, outfile=None, bfactor_key=None):
from Bio.PDB import PDBIO
from Bio.PDB.mmcifio import MMCIFIO
__io = MMCIFIO()
# write structure to file
if outfile is None:
outfile = self.name + ".mmcif"
if bfactor_key is not None:
for atom in self.get_atoms():
if bfactor_key in atom.xtra:
atom.bfactor = atom.xtra[bfactor_key]
else:
atom.bfactor = 0.0
logging.debug("Saving mmcif file: %s", outfile)
__io.set_structure(self.structure)
__io.save(outfile)
return outfile
def save_pdb(self, outfile=None, bfactor_key=None):
from Bio.PDB import PDBIO
__io = PDBIO()
# write structure to file
if outfile is None:
outfile = self.name + ".pdb"
if bfactor_key is not None:
for atom in self.get_atoms():
if bfactor_key in atom.xtra:
atom.bfactor = atom.xtra[bfactor_key]
else:
atom.bfactor = 0.0
logging.debug("Saving pdb file: %s", outfile)
__io.set_structure(self.structure)
__io.save(outfile)
return outfile