-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexcel.py
More file actions
120 lines (100 loc) · 4.9 KB
/
excel.py
File metadata and controls
120 lines (100 loc) · 4.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import openpyxl
from openpyxl.styles import Font
from openpyxl.styles import PatternFill
from openpyxl.styles import Border, Side
import utils
taxid2name = utils.get_taxid_to_scientificname()
def write_excel(data, filename, header_bg="eeffed"):
lengths = [len(v) for v in data.values()]
if len(set(lengths)) != 1:
length_check = {k: len(v) for k, v in data.items()}
print(f"Column lengths: {length_check}")
raise ValueError("All columns must have the same number of rows.")
wb = openpyxl.Workbook()
ws = wb.active
headers = list(data.keys())
ws.append(headers)
for row in range(len(next(iter(data.values())))):
ws.append([data[header][row] for header in headers])
border = Side(border_style="thin")
for cell in ws["1:1"]:
cell.font = Font(bold=True)
cell.fill = PatternFill('solid', start_color=header_bg)
cell.border = Border(left=border, right=border, top=border, bottom=border)
for column in ws.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
if cell.value and len(str(cell.value)) > max_length:
max_length = len(cell.value)
ws.column_dimensions[column_letter].width = min(50, max_length + 2)
wb.save(filename)
def add_sheet(data, filename, sheet_name, header_bg="eeffed"):
lengths = [len(v) for v in data.values()]
if len(set(lengths)) != 1:
length_check = {k: len(v) for k, v in data.items()}
print(f"Column lengths: {length_check}")
raise ValueError("All columns must have the same number of rows.")
try:
wb = openpyxl.load_workbook(filename)
except FileNotFoundError:
wb = openpyxl.Workbook()
if len(wb.sheetnames) == 1 and wb.active.max_row <= 1:
wb.remove(wb.active)
if sheet_name in wb.sheetnames:
del wb[sheet_name]
ws = wb.create_sheet(sheet_name)
headers = list(data.keys())
ws.append(headers)
for row in range(len(next(iter(data.values())))):
ws.append([data[header][row] for header in headers])
border = Side(border_style="thin")
for cell in ws["1:1"]:
cell.font = Font(bold=True)
cell.fill = PatternFill('solid', start_color=header_bg)
cell.border = Border(left=border, right=border, top=border, bottom=border)
# Auto-adjust column widths
for column in ws.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
if cell.value and len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
ws.column_dimensions[column_letter].width = min(50, max_length + 2)
wb.save(filename)
def add_hit(output_data, hit):
output_data["Query accession"].append(hit.get("qseqid", ""))
output_data["Subject accession"].append(hit.get("sseqid",""))
output_data["Subject description"].append(hit.get("stitle",""))
output_data["Subject species (taxid)"].append(str(hit.get("staxid") or ""))
output_data["Subject species (name)"].append(taxid2name.get(str(hit.get("staxid")), "") if hit.get("staxid") else "")
output_data["Gene Name"].append(utils.gene_name_from_stitle(hit.get("stitle","")))
output_data["Bitscore"].append(f"{hit.get('bits',0):.1f}")
output_data["Evalue"].append(f"{hit.get('evalue',0):.1e}")
output_data["Identity (%)"].append(f"{hit.get('pident',0):.2f}")
output_data["Similarity (%)"].append(f"{hit.get('ppos',0):.2f}")
output_data["Query coverage (%)"].append(f"{hit.get('_qcov',0)*100:.2f}")
output_data["Subject coverage (%)"].append(f"{hit.get('_scov',0)*100:.2f}")
output_data["Common ancestor (rank)"].append(hit.get("common_ancestor_rank",""))
output_data["Common ancestor (taxID)"].append(hit.get("common_ancestor_taxid",""))
output_data["Common ancestor (name)"].append(hit.get("common_ancestor_name",""))
output_data["Hit found"].append("True")
return output_data
def add_no_hit(output_data, qid):
output_data["Query accession"].append(qid)
output_data["Subject accession"].append("")
output_data["Subject description"].append("")
output_data["Subject species (taxid)"].append("")
output_data["Subject species (name)"].append("")
output_data["Gene Name"].append("")
output_data["Bitscore"].append("")
output_data["Evalue"].append("")
output_data["Identity (%)"].append("")
output_data["Similarity (%)"].append("")
output_data["Query coverage (%)"].append("")
output_data["Subject coverage (%)"].append("")
output_data["Common ancestor (rank)"].append("")
output_data["Common ancestor (taxID)"].append("")
output_data["Common ancestor (name)"].append("")
output_data["Hit found"].append("False")
return output_data