-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats.py
More file actions
128 lines (107 loc) · 4.3 KB
/
stats.py
File metadata and controls
128 lines (107 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import os
import re
def generate_combined_figure(stats, output_file='combined_results.png'):
total_nb_query = stats['Step 1']['nb_query']
stats_dict = {
'step': ['0'],
'rank': ['NA'],
'taxon_name': ['NA'],
'taxon_id': ['NA'],
'dbsize': ['NA'],
'prots_with_hit': [0],
'%_prots_with_hit': [0],
'elapsed_time_min': [0.0],
'elapsed_time_str': ['0m']
}
for step in stats:
if step.startswith("Step"):
elapsed_time_min = float(stats[step]['elapsed_time'])
stats_dict['step'].append(step.replace("Step ", ""))
stats_dict['rank'].append(stats[step]['rank'])
stats_dict['taxon_name'].append(stats[step]['taxon_name'])
stats_dict['taxon_id'].append(stats[step]['taxon_id'])
stats_dict['dbsize'].append(stats[step]['dbsize'])
stats_dict['prots_with_hit'].append(stats[step]['prots_with_hit'])
stats_dict['%_prots_with_hit'].append(round(100 * stats[step]['prots_with_hit'] / total_nb_query))
stats_dict['elapsed_time_min'].append(elapsed_time_min)
stats_dict['elapsed_time_str'].append(format_elapsed_time(elapsed_time_min))
# si entre 0 et 20 -> height=10 et ratio=40-60
# si entre 21 et 30 -> ??
fig = plt.figure(figsize=(12, 10))
gs = fig.add_gridspec(2, 1, height_ratios=[0.6, 0.4])
ax_table = fig.add_subplot(gs[0, 0])
create_table(stats_dict, ax_table)
ax_plot = fig.add_subplot(gs[1, 0])
create_plot(stats_dict, ax_plot)
plt.tight_layout()
plt.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"Combined figure saved as {output_file}")
return fig
def format_elapsed_time(minutes, pos=None):
if minutes >= 60:
hours = int(minutes // 60)
remaining_minutes = minutes % 60
if remaining_minutes.is_integer():
return f"{hours}h {remaining_minutes:.0f}m"
return f"{hours}h {remaining_minutes:.1f}m"
else:
if minutes.is_integer():
return f"{minutes:.0f}m"
return f"{minutes:.1f}m"
def create_table(stats_dict, ax):
table_data = []
headers = [
"Step",
"Taxonomic Rank",
"Taxon Name",
"Taxon ID",
"Database size\n(#seqs)",
"# Named proteins",
"Cumulative\nelapsed time"
]
for i in range(len(stats_dict['step'])):
table_data.append([
stats_dict['step'][i],
stats_dict['rank'][i],
stats_dict['taxon_name'][i],
stats_dict['taxon_id'][i],
stats_dict['dbsize'][i],
f"{stats_dict['prots_with_hit'][i]} ({stats_dict['%_prots_with_hit'][i]}%)",
stats_dict['elapsed_time_str'][i]
])
ax.axis('tight')
ax.axis('off')
table = ax.table(
cellText=table_data,
colLabels=headers,
cellLoc='left',
loc='center',
colWidths=[0.1, 0.2, 0.3, 0.1, 0.2, 0.2, 0.2]
)
table.scale(1, 1.5)
table.auto_set_font_size(False)
table.set_fontsize(11)
for (row, col), cell in table.get_celld().items():
if row == 0:
cell.set_text_props(weight='bold', color='white')
cell.set_facecolor('darkgreen')
cell.set_height(cell.get_height() * 1.5)
else:
if row % 2:
cell.set_facecolor('lightgrey')
return table
def create_plot(stats_dict, ax):
color = 'tab:green'
ax.plot(stats_dict['step'], stats_dict['%_prots_with_hit'], 'o-', color=color, label='Named proteins (%)')
ax.set_ylabel('Named proteins (%)', color=color)
ax.tick_params(axis='y', labelcolor=color)
ax2 = ax.twinx()
color = 'tab:blue'
ax2.plot(stats_dict['step'], stats_dict['elapsed_time_min'], 'o-', color=color, label=f'Cumulative Elapsed Time')
ax2.yaxis.set_major_formatter(FuncFormatter(format_elapsed_time))
ax2.set_ylabel('Cumulative Elapsed Time', color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax.set_xlabel('Step (refer to table above)')
return ax