-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollapse-insertion-map.py
More file actions
executable file
·107 lines (75 loc) · 2.69 KB
/
collapse-insertion-map.py
File metadata and controls
executable file
·107 lines (75 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python2
import sys
from optparse import OptionParser
###############################################################################
USAGE = """
python collapse-insertion-map.py --alignparsefam <txt file from add-family-to-align-parse.py>
Will output updated set to file in same dir.
"""
parser = OptionParser(USAGE)
parser.add_option('--alignparsefam',dest='alignParseFam', help = 'align parse file with family info')
(options, args) = parser.parse_args()
if options.alignParseFam is None:
parser.error('alignParseFam not given')
###############################################################################
def check_to_skip(line):
# see if it is one that we want to skip
# no fam, no chrom, mismatch, etc
if line[0] == 'NotAssigned':
return True
if line[3] == 'HIV-1_provirs':
return True
if line[7] == 'HIV-1_provirs':
return True
if line[7] == 'HIV-1_provirs':
return True
if line[5] != line[9]:
return True #diff dirs...
if line[3] != line[7] != line[9]:
return True #diff chroms
if int(line[6]) <= 10: # skip if unclear ins point
return True
###############################################################################
print options.alignParseFam
ofn = options.alignParseFam + '.collapsed'
print 'puttign output to',ofn
insData = {}
inFile = open(options.alignParseFam,'r')
for line in inFile:
if line[0] == '#':
continue
line = line.rstrip()
line = line.split()
toSkip = check_to_skip(line)
if toSkip is True:
continue
zc = line[0]
insC = line[3]
insBP = line[4]
insStrand = line[5]
shearC = line[7]
shearBP = line[8]
if zc not in insData:
insData[zc] = {}
if (insC,insBP,insStrand) not in insData[zc]:
insData[zc][(insC,insBP,insStrand)] = {}
insData[zc][(insC,insBP,insStrand)]['reads'] = 0
insData[zc][(insC,insBP,insStrand)]['reads'] += 1
if (shearC,shearBP) not in insData[zc][(insC,insBP,insStrand)]:
insData[zc][(insC,insBP,insStrand)][(shearC,shearBP)] = 0
insData[zc][(insC,insBP,insStrand)][(shearC,shearBP)] += 1
inFile.close()
zcList = insData.keys()
zcList.sort()
outFile = open(ofn,'w')
nl = ['#zipFamily','insChrom','insBP','insStrand','numShearPoints','totReads']
nl = '\t'.join(nl) + '\n'
outFile.write(nl)
for zc in zcList:
insPointList = insData[zc].keys()
for insPoint in insPointList:
nl = [zc,insPoint[0],insPoint[1],insPoint[2],len(insData[zc][insPoint]) -1,insData[zc][insPoint]['reads']]
nl = [str(j) for j in nl]
nl = '\t'.join(nl) + '\n'
outFile.write(nl)
outFile.close()