forked from darcyabjones/mitoflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpost.nf
More file actions
131 lines (102 loc) · 2.81 KB
/
post.nf
File metadata and controls
131 lines (102 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env nextflow
/*
vim: syntax=groovy
-*- mode: groovy;-*-
*/
def helpMessage() {
log.info"""
# mitoflow - postprocessing
Filter out mitochondrial contigs from an assembly.
## Usage
```bash
nextflow run -resume ./post.nf "*{_genomic.fasta,_mitochondrial.fasta,.bam}"
```
Note that because of the way the glob works, you need something unique
in the bracket expansion for the pattern to match properly.
E.G. Say you had the files `genome.fasta` and `genome_mitochondrial.fasta`
the pattern, `*{.fasta,_mitochondrial.fasta,.bam} doesn't work because
`genome_mitochondrial.fasta` will match twice.
It's annoying, but you might just have to rename your files. Sorry!
Requirements:
minimap2
bedtools
Python3 with Biopython installed
""".stripIndent()
}
if (params.help){
helpMessage()
exit 0
}
params.genomes = false
params.coverage = 0.95
params.percentile = 0.992
if ( params.genomes ) {
genomes = Channel.fromFilePairs(
params.genomes,
checkIfExists: true,
size: 3,
type: "file",
flat: true
)
} else {
log.info "Hey I need some assemblies and mitochondria to filter out."
exit 1
}
genomes.view().into {
genomes4Align;
genomes4Coverage;
}
// Identity filtering is just done as part of the minimap alignment, rather
// than filtering out the results.
// I'll need to add some options here to customise, but the preset value works
// pretty well.
process align {
label "minimap2"
tag { name }
input:
set val(name), file(bam), file(asm), file(mito_asm) from genomes4Align
output:
set val(name), file("matches.paf"), file(asm) into alignedGenomes
"""
minimap2 \
-x asm20 \
-o matches.paf \
${mito_asm} \
${asm}
"""
}
process coverage {
label "bedtools"
input:
set val(name), file(bam), file(asm), file(mito_asm) from genomes4Coverage
output:
set val(name), file("per_base_cov.tsv") into pbCoverages
"""
bedtools genomecov -ibam ${bam} -d > per_base_cov.tsv
"""
}
process filter {
label "python3"
tag { name }
publishDir "${params.outdir}/filtered_assemblies"
input:
set val(name), file("matches.paf"), file("genome.fasta"),
file("per_base_cov.tsv") from alignedGenomes
.join(pbCoverages, by: 0)
output:
set val(name),
file("${name}_matches.tsv"),
file("${name}_genomic.fasta"),
file("${name}_mitochondrial.fasta") into filteredGenomes
"""
filter_mitochondria.py \
-i matches.paf \
-f genome.fasta \
-p per_base_cov.tsv \
-g ${name}_genomic.fasta \
-m ${name}_mitochondrial.fasta \
-t ${name}_matches.tsv \
--coverage ${params.coverage} \
--percentile ${params.percentile}
"""
}