-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathseparate_pop_bedfiles.py
More file actions
53 lines (44 loc) · 1.17 KB
/
separate_pop_bedfiles.py
File metadata and controls
53 lines (44 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from __future__ import print_function
import sys
import gzip
popfile = open(sys.argv[1], 'r')
AFR_list = []
EUR_list = []
ASN_list = []
for line in popfile:
if "samp" in line:
continue
ln = line.strip().split('\t')
msp_ID = ln[0]
pop = ln[1]
#print(msp_ID,file=sys.stdout)
if pop == 'EUR':
EUR_list.append(msp_ID)
elif pop == 'ASN' or pop == 'EAS':
ASN_list.append(msp_ID)
elif pop == 'AFR' or pop == 'YRI':
AFR_list.append(msp_ID)
popfile.close()
#print(ASN_list, file=sys.stdout)
bedfile = gzip.open(sys.argv[2], 'rb')
fname = str(sys.argv[2]).strip('.gz')
EUR_bedfile=gzip.open(fname+'_EUR.gz','wb')
ASN_bedfile=gzip.open(fname+'_ASN.gz','wb')
AFR_bedfile=gzip.open(fname+'_AFR.gz','wb')
print(fname)
for line in bedfile:
# print(line.strip())
ln = line.strip().split('\t')
msp_hap_chr = ln[0]
msp_ID = msp_hap_chr.split(':')[0]
# print(msp_ID)
if msp_ID in EUR_list:
EUR_bedfile.write(line)
elif msp_ID in ASN_list:
ASN_bedfile.write(line)
elif msp_ID in AFR_list:
AFR_bedfile.write(line)
bedfile.close()
EUR_bedfile.close()
ASN_bedfile.close()
AFR_bedfile.close()