-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake-assem-images.py
More file actions
76 lines (56 loc) · 2.66 KB
/
make-assem-images.py
File metadata and controls
76 lines (56 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import sys
import bactools3
import argparse
import os
parser = argparse.ArgumentParser(description='make images of contig')
parser.add_argument('--outdir', type=str,help='Path to output directory for this analysis',required=True)
parser.add_argument('--name', type=str,help='name of project',required=True)
parser.add_argument('--longread', type=str,help='file of long reads',required=True)
parser.add_argument('--contig', type=str,help='fasta file of contig, should be single sequence',required=True)
parser.add_argument('--vector', type=str,help='fasta file of vector should be single sequence',required=True)
parser.add_argument('--clonename', type=str,help='clone name used to extract end sequences',required=True)
parser.add_argument('--miropeats_s', type=int,help='miropeats -s option',required=False,default=200)
args = parser.parse_args()
# setup dictionary for holding information and passing to functions
myData = {}
myData['name'] = args.name
myData['longread'] = args.longread
myData['longreadtype'] = 'ont' # for now, only option is oxford nanopore
myData['contig'] = args.contig
myData['outDir'] = args.outdir
myData['vector'] = args.vector
myData['cloneName'] = args.clonename
myData['libraryEndSeqsFA'] = '/home/jmkidd/links/kidd-lab/jmkidd-projects/dogs/nanopore/BAC-assembly-2020/data/CHORI-82.end-sequences.fa' # sorry, hardcoded in for now.
myData['miropeats_s'] = args.miropeats_s
# setup needed files
if myData['outDir'][-1] != '/':
myData['outDir'] += '/'
if os.path.isdir(myData['outDir']) is False:
print('Output dir doest not exist, making it!')
cmd = 'mkdir ' + myData['outDir']
print(cmd, flush=True)
bactools3.runCMD(cmd)
myData['logFileName'] = myData['outDir'] + 'make-assem-images.log'
print('logFileName',myData['logFileName'])
logi = 1
while os.path.isfile(myData['logFileName']) and os.path.getsize(myData['logFileName']) > 0:
myData['logFileName'] = myData['outDir'] + 'make-assem-images.log.' + str(logi)
logi += 1
print('logFileName',myData['logFileName'])
myData['logFile'] = open(myData['logFileName'],'w')
bactools3.check_prog_paths(myData)
bactools3.write_initial_log(myData)
###########################################################################################
# run miropeats self
bactools3.run_miropeats_self(myData)
# get coverage and long-read information
# map to contig
bactools3.map_to_contig_paf(myData)
bactools3.make_windows_bed(myData,500,100)
bactools3.make_coverage_plot(myData)
bactools3.make_gc_plot(myData)
# show positions of 5 longest reads
bactools3.make_coverage_plot_showlong(myData,10)
# map vector backbone and end sequences
bactools3.make_vector_esp_plot(myData)
myData['logFile'].close()