-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdepth.distributions.py
More file actions
82 lines (68 loc) · 2.94 KB
/
depth.distributions.py
File metadata and controls
82 lines (68 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#-------------------------------------------------------------------------------
# Purpose: Readdepth per sample
#-------------------------------------------------------------------------------
import sys
invcf = sys.argv[1]
src =open(invcf, "r") #
out1 =open("depth_dist_"+invcf, "w")
info={}
Moms={}
in1 =open("Sample.key.txt","r")
for line_idx, line in enumerate(in1):
cols = line.replace('\n', '').split('\t')
# vcf_pos sample FamNo type
# 530 6_c_S111 6 c
# 531 6_m_S305 6 m
if cols[2]=="0": # male pools
info[int(cols[0])]=line.replace('\n', '')
elif cols[3]=="m":
Moms[int(cols[0])]=1
in1.close()
depths=[[],[],[],[]]
for line_idx, line in enumerate(src):
cols = line.replace('\n', '').split('\t')
if cols[0]=="#CHROM":
pass # out1.write(line)
else:
# chr2L 10000435 . T G 999 . DP=10943;VDB=0.952331;SGB=126.197;RPB=0.985224;MQB=0.9975;MQSB=0.999998;BQB=0.0889642;MQ0F=0.000913826;ICB=0.608486;HOB=0.0150187;AC=120;AN=1258;DP4=5088,936,391,68;MQ=59 GT:PL:AD 1/1:255,24,0:0,8 0/1:51,0,82:2,1 0/1:168,0,152:4,5 0/1:147,12,0:0,4 0/0:0,12,171:4,0 0/0:0,6,90:2,0 0/0:0,42,255:14,0 0/0:0,12,184:4,0 0/0:0,33,255:11,0 ./.:0,0,0:0,0 0/1:110,0,178:5,4 0/1:42,0,200:5,1 ./.:0,0,0:0,0 0/0:0,12,184:4,00/1:217,0,116:5,8 ./.:0,0,0:0,0 ./.:0,0,0:0,0 ./.:0,0,0:0,0 0/0:0,75,255:25,0 ./.:0,0,0:0,0 0/0:0,36,255:12,0 0/0:0,6,110:2,0 0/0:0,18,221:6,0 ./.:0,0,0:0,0 0/1:128,0,255:13,4 0/0:0,6,74:2,0 0/0:0,33,255:11,0 ...
cdx = 0
for j in info:
vv=cols[j].split(":") # 0/0:0,15,151:5,0
if len(vv) == 3 and vv[0] != './.':
Rcc=int(vv[2].split(",")[0])
Acc=int(vv[2].split(",")[1])
cdx+=(Rcc+Acc)
if cols[0]=="chr2L" or cols[0]=="chr3L" or cols[0]=="chr2R" or cols[0]=="chr3R":
depths[0].append(cdx)
elif cols[0]=="chrX":
depths[1].append(cdx)
cdx = 0
for j in Moms:
vv=cols[j].split(":") # 0/0:0,15,151:5,0
if len(vv) == 3 and vv[0] != './.':
Rcc=int(vv[2].split(",")[0])
Acc=int(vv[2].split(",")[1])
cdx+=(Rcc+Acc)
if cols[0]=="chr2L" or cols[0]=="chr3L" or cols[0]=="chr2R" or cols[0]=="chr3R":
depths[2].append(cdx)
elif cols[0]=="chrX":
depths[3].append(cdx)
nx={}
for j in range(4):
depths[j].sort()
nx[j] = len(depths[j])
print("no snps per set",nx)
for j in range(1,1000):
out1.write( str(float(j)/1000.0) )
for k in range(4):
val = int(float(j*nx[k])/1000.0)
if nx[k]>0:
out1.write('\t'+str(depths[k][val]))
out1.write('\n')
out1.write( "max" )
for k in range(4):
if nx[k]>0:
out1.write('\t'+str(depths[k][nx[k]-1]))
out1.write('\n')