-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvisualize.py
More file actions
51 lines (41 loc) · 1.84 KB
/
visualize.py
File metadata and controls
51 lines (41 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import matplotlib.pyplot as plt
import os
haplotypes_count = []
haplotypes_size = []
wg_size = []
wg_from_encoded_size = []
haplotypes_bzip2_size = []
encoded_haplotypes_bzip2_size = []
variable_sites = 10000
for i in [5000, 50000, 100000, 200000, 500000, 1000000]:
filename = "haplotypes_" + str(i) + ".txt"
haplotypes = open(filename, "r")
haplotype_count = len(haplotypes.readlines())
haplotypes_bytes_size = haplotype_count * variable_sites / 8
haplotypes_size.append(os.path.getsize(filename) / 1024 ** 2)
haplotypes_count.append(haplotype_count)
haplotypes.close()
filename = "wg_" + str(i) + ".txt"
wg = open(filename, "r")
wg_size.append(os.path.getsize(filename) / 1024 ** 2)
wg.close()
filename = "wg_from_encoded_" + str(i) + ".txt"
wg_from_encoded = open(filename, "r")
wg_from_encoded_size.append(os.path.getsize(filename) / 1024 ** 2)
wg_from_encoded.close()
filename = "haplotypes_" + str(i) + ".txt.bz2"
haplotypes_bzip2_size.append(os.path.getsize(filename) / 1024 ** 2)
filename = "haplotypes_encoded_" + str(i) + ".txt.bz2"
encoded_haplotypes_bzip2_size.append(os.path.getsize(filename) / 1024 ** 2)
fig, ax = plt.subplots()
ax.scatter(haplotypes_count, haplotypes_size, label="haplotypes")
ax.scatter(haplotypes_count, wg_size, label="wheeler graph from haplotypes")
ax.scatter(haplotypes_count, wg_from_encoded_size, label="wheeler graph from encoded haplotypes")
ax.scatter(haplotypes_count, encoded_haplotypes_bzip2_size, label="b2zipped encoded haplotypes")
ax.scatter(haplotypes_count, haplotypes_bzip2_size, label="b2zipped haplotypes")
ax.set_xticks(haplotypes_count)
ax.set_xlabel("number of haplotypes (" + str(variable_sites) + " variable sites)")
ax.set_ylabel("size (MB)")
ax.legend()
plt.xticks(rotation=37, fontsize=8)
plt.savefig("haplotypes_size_visualization.png")