-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNull.var.male.pools.py
More file actions
107 lines (93 loc) · 3.59 KB
/
Null.var.male.pools.py
File metadata and controls
107 lines (93 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
from math import sqrt,asin,sin
chrom = sys.argv[1]
infile = "BCF."+chrom+".vcf"
out2=open(chrom+".nullvar.txt","w")
out1=open(chrom+".pairdz.byfreq.txt","w")
if chrom == "chrX":
MPoolDepth = [430,882]
else:
MPoolDepth = [865,1698]
min2 = 100 # lowest count to include a sample in null variance estimation
minMAF = 0.05
zlow = 2.0*asin(sqrt(minMAF))
zhigh= 2.0*asin(sqrt(1.0-minMAF))
print("lowz,highz",zlow,zhigh)
vcflines={}
data={}
in1 =open("male.pools.txt", "r") #
for line_idx, line in enumerate(in1):
cols = line.replace('\n', '').split('\t')
try:
vcflines[cols[2]].append(int(cols[0]))
data[cols[2]]=[] # r,a,m,z
except KeyError:
vcflines[cols[2]]=[int(cols[0])]
in1.close()
stats={"AB":[0,0,0],"CD":[0,0,0]}
changedist=[[0,0] for j in range(51)]
ctx=[0,0]
src =open(infile, "r") #
for line_idx, line in enumerate(src):
cols = line.replace('\n', '').split('\t')
if cols[0]=="#CHROM":
pass
elif cols[0]==chrom:
# chr2L 10000016 . C A 999 . DP=12589;VDB=0.714727;SGB=1128.3;RPB=0.879548;MQB=0.998021;MQSB=0.98709;BQB=0.514285;MQ0F=0.00111208;ICB=2.28618e-08;HOB=0.116428;AC=775;AN=1296;DP4=2249,421,4055,647;MQ=59 GT:PL:AD 1/1:255,51,0:0,17 1/1:235,18,0:0,6
total_male_depth=0
for letter in vcflines:
data[letter]=[0,0,0,0]
for j in vcflines[letter]:
vv=cols[j].split(":") # 0/0:0,15,151:5,0
if len(vv) == 3 and vv[0] != './.':
data[letter][0]+=int(vv[2].split(",")[0])
data[letter][1]+=int(vv[2].split(",")[1])
m = data[letter][0]+data[letter][1]
total_male_depth+=m
px = float(data[letter][0])/float(m)
z = 2.0*asin(sqrt(px))
data[letter][2]=m
data[letter][3]=z
if total_male_depth>=MPoolDepth[0] and total_male_depth<=MPoolDepth[1]:
z="AB"
m1=data["A"][2]
m2=data["B"][2]
if m1>=min2 and m2>=min2:
z1=data["A"][3]
z2=data["B"][3]
zbar=(z1+z2)/2.0
if zbar>zlow and zbar<zhigh:
stats[z][0]+=1
stats[z][1]+= (z1-z2)**2.0
stats[z][2]+= 1.0/float(m1)+1.0/float(m2)
p = sin(zbar/2.0)**2.0
pcat = min( int(p*100),int((1-p)*100) )
changedist[pcat][0]+=1
changedist[pcat][1]+=abs(z1-z2)
z="CD"
m1=data["C"][2]
m2=data["D"][2]
if m1>=min2 and m2>=min2:
z1=data["C"][3]
z2=data["D"][3]
zbar=(z1+z2)/2.0
if zbar>zlow and zbar<zhigh:
stats[z][0]+=1
stats[z][1]+= (z1-z2)**2.0
stats[z][2]+= 1.0/float(m1)+1.0/float(m2)
p = sin(zbar/2.0)**2.0
pcat = min( int(p*100),int((1-p)*100) )
changedist[pcat][0]+=1
changedist[pcat][1]+=abs(z1-z2)
for z in stats:
x1=stats[z][1]/float(stats[z][0])
x2=stats[z][2]/float(stats[z][0])
print (z,stats[z][0],"mean dz2",x1,"readdepth_var",x2,"rdprop",x2/x1)
nv_init = x1-x2
out2.write(z+'\t'+str(nv_init)+'\n')
for pcat in range(51):
if changedist[pcat][0]>0:
changedist[pcat][1]=changedist[pcat][1]/float(changedist[pcat][0])
out1.write(str(pcat)+'\t'+str(changedist[pcat][0])+'\t'+str(changedist[pcat][1])+'\n')