-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfasta_stats.py
More file actions
executable file
·89 lines (73 loc) · 1.46 KB
/
fasta_stats.py
File metadata and controls
executable file
·89 lines (73 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#! /usr/bin/env python
import sys
from pbcore.io import FastaReader
f = FastaReader(sys.argv[1])
sum_bp = -1
if len(sys.argv) == 3:
sum_bp = int(sys.argv[2])
sum = 0
lenlist = []
more1k = 0
sumncount = 0
numscaffoldwithN = 0
biggest100k = []
for r in f:
l = len(r.sequence)
if l > 1000:
more1k = more1k + 1
if l > 100000:
biggest100k.append(r.name)
ncount = r.sequence.count('N')
sumncount = sumncount + ncount
if ncount != 0:
numscaffoldwithN = numscaffoldwithN + 1
sum = sum + l
lenlist.append(l)
avglen = sum/len(lenlist)
rsum = 0
lenlist.sort()
minlen = lenlist[0]
lenlist.reverse()
maxlen = lenlist[0]
# compute N(G)50/90
for rl in lenlist:
rsum = rsum + rl
if rsum > sum/2:
n50 = rl
break
rsum = 0
for rl in lenlist:
rsum = rsum + rl
if rsum > sum*0.9:
n90 = rl
break
if sum_bp != -1:
rsum = 0
for rl in lenlist:
rsum = rsum + rl
if rsum > sum_bp/2:
ng50 = rl
break
rsum = 0
for rl in lenlist:
rsum = rsum + rl
if rsum > sum_bp*0.9:
ng90 = rl
break
else:
ng50,ng90 = n50,n90
print "Sum bp: ", sum
print "Num scaffolds: ", len(lenlist)
print "Avg len: ", avglen
print "Min len: ", minlen
print "Max len: ", maxlen
print "Scaffolds > 1k: ", more1k
print "Scaffolds with Ns: ", numscaffoldwithN
print "Num Ns: ", sumncount
print "N50: ", n50
print "N90: ", n90
print "NG50: ", ng50
print "NG90: ", ng90
#print "bigger than 100k:"
#for e in biggest100k:
# print e