-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfilt_analysis.py
More file actions
110 lines (108 loc) · 4.39 KB
/
filt_analysis.py
File metadata and controls
110 lines (108 loc) · 4.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import numpy as np
import biotite
from biotite.structure.io.pdb import PDBFile
import json
import pandas as pd
import sys
if len(sys.argv) != 2:
print("Usage: python filt_analysis.py <output_path>")
sys.exit(1)
output_path = sys.argv[1]
restype_1to3 = {
"A": "ALA",
"R": "ARG",
"N": "ASN",
"D": "ASP",
"C": "CYS",
"Q": "GLN",
"E": "GLU",
"G": "GLY",
"H": "HIS",
"I": "ILE",
"L": "LEU",
"K": "LYS",
"M": "MET",
"F": "PHE",
"P": "PRO",
"S": "SER",
"T": "THR",
"W": "TRP",
"Y": "TYR",
"V": "VAL",
}
restype_3to1 = {v: k for k, v in restype_1to3.items()}
allclosedf= pd.DataFrame()
openclosedf= pd.DataFrame()
dirlist = os.listdir(output_path)
allclose_flist=[]
open_close_flist=[]
for d in dirlist:
if os.path.isdir(d):
for i in range(0,1000):
try:
newdf= pd.DataFrame()
newdf['name'] = [os.path.join(d,str(i))]
seq = open(os.path.join(d,str(i),'seq.fasta')).readlines()[-1]
print(os.path.join(d,str(i),'seq.fasta'))
ps_site='TENLYFQARRAS'
start= seq.find(ps_site)
end = start+len(ps_site)
s_pos= end-1
a_pos= end-5
subflist=os.listdir(os.path.join(d,str(i)))
plddt_flag=0
sasa_flag=0
align_flag=0
for j in range(1,6):
for f in subflist:
if '.json' in f and 'rank_00'+str(j) in f:
jsondata = json.load(open(os.path.join(d,str(i),f),'r'))
plddt=np.mean(jsondata['plddt'][:start])
print(plddt)
if plddt>75:
plddt_flag+=1
else:
plddt_flag+=0
newdf['rank_'+str(j)+'plddt'] = [plddt]
align = os.popen('TMalign ./'+d+'/'+str(i)+'/seq_relaxed_rank_00'+str(j)+'_**.pdb ./'+d+'/test.pdb ')
alignpart =align.readlines()[-4][start:]
print(alignpart)
align_num=alignpart.count(':')
miss_num=alignpart.count(' ')
tmp = f.split('_')
tmp[1] ='relaxed'
pdb_file='_'.join(tmp)[:-4]+'pdb'
structure = PDBFile.read(os.path.join(d,str(i),pdb_file)).get_structure()[0]
sasa=biotite.structure.sasa(structure)
A_idx = structure.res_id==a_pos+1
S_idx = structure.res_id==s_pos+1
Asasa =np.nansum(sasa[A_idx])
Ssasa = np.nansum(sasa[S_idx])
newdf['rank_'+str(j)+'SASA_A'] = [Asasa]
newdf['rank_'+str(j)+'SASA_S'] = [Ssasa]
newdf['rank_'+str(j)+'align_num'] = [align_num]
newdf['rank_'+str(j)+'miss_num'] = [miss_num]
print(align_num,miss_num)
if Asasa <5 and Ssasa<5:
sasa_flag+=1
else:
sasa_flag+=0
if align_num>15 and miss_num<5:
align_flag+=1
newdf['sasaflag'] =[sasa_flag]
newdf['alignflag'] =[sasa_flag]
print(sasa_flag,align_flag)
if plddt_flag>=4:
if sasa_flag>=4 and align_flag>=4:
print('all_close',i,j,d,alignpart,plddt)
allclose_flist.append(os.path.join(d,str(i)))
allclosedf = pd.concat([allclosedf,newdf],ignore_index=True)
elif (sasa_flag>=2 and align_flag>=2):
print('colose_open',i,j,d,alignpart,plddt)
open_close_flist.append(os.path.join(d,str(i)))
openclosedf = pd.concat([openclosedf,newdf],ignore_index=True)
except:
continue
openclosedf.to_csv(os.path.join(output_path,'openclose.csv'))
allclosedf.to_csv(os.path.join(output_path,'allclose.csv'))