-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path14_update-3way-align.py
More file actions
337 lines (277 loc) · 14.6 KB
/
14_update-3way-align.py
File metadata and controls
337 lines (277 loc) · 14.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
import sys
import genutils
import brkptalign
import os.path
from optparse import OptionParser
#############################################################################
USAGE = """python 14_update-3way-align.py
--in <input of miropeats parse with updates>
--out <output for new breakpoints parse>
--ref <reference genome fasta, must be indexed>
--miropeats_base <miropeats base dir>
"""
parser = OptionParser(USAGE)
parser.add_option('--in',dest='inputFileName', help = 'file of initial miropeats input')
parser.add_option('--out',dest='outputFileName', help = 'file of new miropeats output')
parser.add_option('--ref',dest='refFasta', help = 'reference genome fasta')
parser.add_option('--miropeats_base',dest='mrDir', help = 'directory for temp files')
(options,args)=parser.parse_args()
if options.inputFileName is None:
parser.error('input miropeats result file not given')
if options.outputFileName is None:
parser.error('output miropeats result file not given')
if options.refFasta is None:
parser.error('refFasta file not given')
if options.mrDir is None:
parser.error('miropeats dir not given')
#############################################################################
if options.mrDir[-1] != '/':
options.mrDir += '/'
refGenomeFasta = options.refFasta
workingDirBase = options.mrDir
outResInputfile = options.inputFileName
outResInputfileOut = options.outputFileName
print 'Writing table of 3 way alignment updated results to',outResInputfileOut
# hardcoded in base scripts dir
# set scriptDir to location where scripts are installed
#scriptDir = 'scripts/'
scriptDir = '../'
regDelta = 2000
targetType = 'SINE/Alu' # for the sliding..
inFile = open(outResInputfile,'r')
outTable = open(outResInputfileOut,'w')
header = ['siteID','chromosome','ContigName','ContigOreintation','ContigLen','chromFragStart','chromFragEnd','leftChromEnd','leftContigEnd','rightChromStart','rightContigStart']
header.extend(['genomeAlignFragStart','genomeAlignFragEnd','leftFragStart','leftFragEnd','rightFragStart','rightFragEnd'])
header.extend(['leftBPGFrag','leftBPChrom','leftBPContig','rightBPGFrag','rightBPChrom','rightBPContig','TSDlen','insLen'])
header = '\t'.join(header) + '\n'
outTable.write(header)
numDid = 0
passed = True
toCheck = []
#toCheck.append('chr1_115543668')
for line in inFile:
line = line.rstrip()
line = line.split('\t')
siteID = line[0]
if siteID == 'siteID':
continue
if len(toCheck) > 0 and siteID not in toCheck:
continue
print line
data = {}
data['refGenomeFasta'] = refGenomeFasta
brkptalign.populate_data_from_mrfile(data,line,regDelta,workingDirBase,addBreaks=True)
brkptalign.run_rm(data,run=False) #just to get file names, already ran
data['genomeFragAlignBegin'] = int(line[11])
data['genomeFragAlignEnd'] = int(line[12])
data['contigLeftFragAlignBegin'] = int(line[13])
data['contigLeftFragAlignEnd'] = int(line[14])
data['contigRightFragAlignBegin'] = int(line[15])
data['contigRightFragAlignEnd'] = int(line[16])
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True) # because things did not match
brkptalign.populate_previous_align_results(data,line)
print siteID
extendSize = 25
if len(line) == 29:
print 'UPDATES'
leftContigMP = line[25]
rightContigMP = line[26]
genomeMP = line[27]
changes = line[28]
else:
changes = 'NO_CHANGE'
if changes == 'double':
changes = 'NO_CHANGE'
changes = changes.lower() # fix them to have less
if changes in ['change','changes']:
print 'manual contig extraction updates'
if leftContigMP != '':
leftContigMP = int(leftContigMP)
print 'leftMP',leftContigMP
data['contigLeftFragAlignBegin'] = leftContigMP - 3*extendSize
data['contigLeftFragAlignEnd'] = leftContigMP + 3*extendSize
if rightContigMP != '':
rightContigMP = int(rightContigMP)
print 'rightMP',rightContigMP
print data['contigRightFragAlignBegin'],data['contigRightFragAlignEnd']
data['contigRightFragAlignBegin'] = rightContigMP - 3*extendSize
data['contigRightFragAlignEnd'] = rightContigMP + 3*extendSize
if genomeMP != '':
genomeMP = int(genomeMP)
print 'genomeMP',genomeMP
data['genomeFragAlignBegin'] = data['chromFragStart'] - 1 + genomeMP - 6*extendSize
data['genomeFragAlignEnd'] = data['chromFragStart'] - 1 + genomeMP + 6*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['center chrom','recenter chrom']:
print 'center chrom to',data['leftBpChromCoords']
data['genomeFragAlignBegin'] = data['leftBpChromCoords']- 6*extendSize
data['genomeFragAlignEnd'] = data['leftBpChromCoords'] + 6*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['NO_CHANGE','no_change']:
print 'NO CHANGE, still do update'
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend-all','extend all', 'expand all']:
data['contigRightFragAlignBegin'] = data['contigRightFragAlignBegin'] - 2*extendSize
data['contigRightFragAlignEnd'] = data['contigRightFragAlignEnd'] + 2*extendSize
data['contigLeftFragAlignBegin'] = data['contigLeftFragAlignBegin'] - 2*extendSize
data['contigLeftFragAlignEnd'] = data['contigLeftFragAlignEnd'] + 2*extendSize
data['genomeFragAlignBegin'] = data['genomeFragAlignBegin'] - 2*extendSize
data['genomeFragAlignEnd'] = data['genomeFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend right']:
data['contigRightFragAlignBegin'] = data['contigRightFragAlignBegin'] - 2*extendSize
data['contigRightFragAlignEnd'] = data['contigRightFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend chrom','extend genome','enlarge chromosome','expand genome','expand chrom']:
print 'Extend Chrom!'
data['genomeFragAlignBegin'] = data['genomeFragAlignBegin'] - 2*extendSize
data['genomeFragAlignEnd'] = data['genomeFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['expand genome a lot']:
print 'Extend Chrom!'
data['genomeFragAlignBegin'] = data['genomeFragAlignBegin'] - 6*extendSize
data['genomeFragAlignEnd'] = data['genomeFragAlignEnd'] + 6*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend genome right']:
print 'Extend Chrom!'
data['genomeFragAlignEnd'] = data['genomeFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend genome left','expand genome left']:
print 'Extend Chrom!'
data['genomeFragAlignBegin'] = data['genomeFragAlignBegin'] - 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['shorten left']:
print 'shorten left'
data['contigLeftFragAlignEnd'] = data['contigLeftFragAlignEnd'] - 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['shorten right']:
print 'shorten right'
data['contigRightFragAlignBegin'] = data['contigRightFragAlignBegin'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend right right']:
print 'extend right right'
data['contigRightFragAlignEnd'] = data['contigRightFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend right left']:
print 'extend right left'
data['contigRightFragAlignBegin'] = data['contigRightFragAlignBegin'] - 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['slide left left and slide genome right']:
print 'extend right right'
data['genomeFragAlignBegin'] = data['genomeFragAlignBegin'] + 2*extendSize
data['genomeFragAlignEnd'] = data['genomeFragAlignEnd'] + 2*extendSize
data['contigLeftFragAlignBegin'] = data['contigLeftFragAlignBegin'] - 2*extendSize
data['contigLeftFragAlignEnd'] = data['contigLeftFragAlignEnd'] - 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['slide genome left']:
print 'extend right right'
data['genomeFragAlignBegin'] = data['genomeFragAlignBegin'] - 2*extendSize
data['genomeFragAlignEnd'] = data['genomeFragAlignEnd'] - 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend left left']:
print 'extend left left'
data['contigLeftFragAlignBegin'] = data['contigLeftFragAlignBegin'] - 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend left right']:
print 'extend left left'
data['contigLeftFragAlignEnd'] = data['contigLeftFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend left left and right right']:
print 'extend left left and right right'
data['contigLeftFragAlignBegin'] = data['contigLeftFragAlignBegin'] - 2*extendSize
data['contigRightFragAlignEnd'] = data['contigRightFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['slide right right']:
data['contigRightFragAlignBegin'] = data['contigRightFragAlignBegin'] + 2*extendSize
data['contigRightFragAlignEnd'] = data['contigRightFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['slide genome right']:
data['genomeFragAlignBegin'] = data['genomeFragAlignBegin'] + 2*extendSize
data['genomeFragAlignEnd'] = data['genomeFragAlignEnd'] + 2*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
elif changes in ['extend-genome-check']:
print 'genome check and extend'
if (data['genomeFragAlignEnd'] - data['genomeFragAlignBegin'] ) >= 1000:
print 'genomeMP',genomeMP
data['genomeFragAlignBegin'] = data['chromFragStart'] - 1 + 2000 - 6*extendSize
data['genomeFragAlignEnd'] = data['chromFragStart'] - 1 + 2000 + 6*extendSize
brkptalign.regulate_align_frags(data)
brkptalign.repeat_3way_align_after_fragment_update(data,updatedBp = True)
else:
print '??'
print data['siteID'],changes
print '??'
sys.exit()
# ok, ready now to update the rest
# setup output to print
nl = [data['siteID'],data['chromName'],data['contigName'],data['contigDir']]
nl.append(str(data['contigLen']))
nl.append(str(data['chromFragStart']))
nl.append(str(data['chromFragEnd']))
nl.append(str(data['leftChromFragEnd']))
nl.append(str(data['leftContigEnd']))
nl.append(str(data['rightChromFragStart']))
nl.append(str(data['rightContigStart']))
nl.append(data['genomeFragAlignBegin'])
nl.append(data['genomeFragAlignEnd'])
nl.append(data['contigLeftFragAlignBegin'])
nl.append(data['contigLeftFragAlignEnd'])
nl.append(data['contigRightFragAlignBegin'])
nl.append(data['contigRightFragAlignEnd'])
nl.append(data['leftBpGenomeFragCoords'])
nl.append(data['leftBpChromCoords'])
nl.append(data['leftBpContigCoord'])
nl.append(data['rightBpGenomeFragCoords'])
nl.append(data['rightBpChromCoords'])
nl.append(data['rightBpContigCoord'])
gTSDs = data['rightBpChromCoords']
gTSDe = data['leftBpChromCoords']
gTSDl = gTSDe - gTSDs + 1
nl.append(str(gTSDl))
insStart = data['leftBpContigCoord'] + 1
insEnd = data['rightBpContigCoord'] - 1
s = insEnd-insStart+1
nl.append(str(s))
data['gTSDl'] = gTSDl
data['insLen'] = s
dataPickleFileName = data['alignOutDir'] + '/' + data['siteID'] + '.data.pickle'
data['dataPickleFileName'] = dataPickleFileName
brkptalign.write_pickle_dictionary(data,dataPickleFileName)
nl = [str(j) for j in nl]
nl = '\t'.join(nl) + '\n'
outTable.write(nl)
# draw the combined align'
cmd = 'python ' + scriptDir + 'annotate-miropeats-3align.py '
cmd += ' --pickle ' + dataPickleFileName
print cmd
genutils.runCMD(cmd)
data['annotatedBPPS'] = data['miroOutPS'] + '.breakpoints.annotated.ps'
data['psBPAnnotatedPDF'] = data['alignOutDir'] + '/' + data['siteID'] + '.3way.combined.annotated.pdf'
if os.path.isfile(data['psBPAnnotatedPDF']) is True:
cmd = 'rm ' + data['psBPAnnotatedPDF']
genutils.runCMD(cmd)
cmd = 'ps2pdf %s %s' % (data['annotatedBPPS'],data['psBPAnnotatedPDF'])
print cmd
genutils.runCMD(cmd)
numDid += 1
inFile.close()
outTable.close()