-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract-zips.py
More file actions
executable file
·77 lines (54 loc) · 2.61 KB
/
extract-zips.py
File metadata and controls
executable file
·77 lines (54 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python2
import os
import sys
import zipcodetools
from optparse import OptionParser
###############################################################################
USAGE = """
python extract-zips.py --fq1 <fq.gz file for read 1> --fq2 <fq.gz file for read 2>
--outbase <base dir for output> --name <name for sample set>
--lefttarget <left target to align, default=ACGAAGACAAGATATCCTTGATCTG>
--righttarget <right target to align, default=GCCATCGATGTGGATCTACCACACA>
Will extract zipcde and info into outbase/name directory
Some parameters are hardcoded in. Assumes various programs are in users path.
"""
parser = OptionParser(USAGE)
parser.add_option('--fq1',dest='fq1', help = 'fq.gz for read 1')
parser.add_option('--fq2',dest='fq2', help = 'fq.gz for read 2')
parser.add_option('--outbase',dest='outBase', help = 'base dir for output')
parser.add_option('--name',dest='name', help = 'name for this sample')
parser.add_option('--lefttarget',dest='leftTarget',default='ACGAAGACAAGATATCCTTGATCTG',help = 'region left of zip to align')
parser.add_option('--righttarget',dest='rightTarget',default='GCCATCGATGTGGATCTACCACACA',help = 'region right of zip to align')
(options, args) = parser.parse_args()
if options.fq1 is None:
parser.error('fq1 not given')
if options.fq2 is None:
parser.error('fq2 not given')
if options.outBase is None:
parser.error('output base dir not given')
if options.name is None:
parser.error('name not given')
###############################################################################
print 'Starting extraction for name',options.name
myData = {}
myData['fq1'] = options.fq1
myData['fq2'] = options.fq2
myData['outBase'] = options.outBase
myData['name'] = options.name
myData['leftTarget'] = options.leftTarget
myData['rightTarget'] = options.rightTarget
myData['leftTarget'] = myData['leftTarget'].upper()
myData['rightTarget'] = myData['rightTarget'].upper()
myData['minLeftMatch'] = len(myData['leftTarget']) - 1
myData['minRightMatch'] = len(myData['rightTarget']) - 1
myData['minZipLen'] = 20 - 3
myData['maxZipLen'] = 20 + 3
zipcodetools.set_default_prog_paths(myData)
zipcodetools.setup_output_dir(myData)
zipcodetools.run_flash(myData)
zipcodetools.read_flash_stats(myData)
zipcodetools.get_zipcode_noindel(myData)
zipcodetools.count_extracted_zips(myData)
zipcodetools.print_extraction_stats(myData)
print 'Stats output to file:',myData['extractStatsFile']
print 'Set of zipcodes output to',myData['zipTable']