-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.nf
More file actions
143 lines (110 loc) · 4.48 KB
/
main.nf
File metadata and controls
143 lines (110 loc) · 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env nextflow
if (params.help) {
helpMsg()
exit 0
}
include { FILTER } from "./bin/subworkflows/filter.nf"
include { CLUSTER } from "./bin/subworkflows/cluster.nf"
include { STRUCTURE_PREDICITON } from "./bin/subworkflows/structure.nf"
include { SEARCH } from "./bin/subworkflows/search.nf"
include { INTERPROSCAN } from "./bin/subworkflows/interproscan.nf"
include { VALIDATE } from "./bin/subworkflows/validate.nf"
include { REPORT_NEW } from "./bin/subworkflows/output_new.nf"
include { GFF } from "./bin/subworkflows/writeGff.nf"
def helpMsg() {
println """
Usage:
The standard command to run the pipeline is as follows:
nextflow run main.nf --foldseekdb=databaseDirectory/db/db_name --colabdb=databaseDirectory/colabfolddb --email=valid-E-mail-adress
Mandatory arguments:
--foldseekdb Database used for Foldseek structural alignment (db_name is given during database creation)
--email A valid E-mail is required for the InterProScan API
--colabdb Directory of ColabFold search databases. Ignored when --wsl is set
OR
--wsl Sets environmental flags to run the pipeline on Windows Subsystem for Linux.
Also runs structure prediction on public ColabFold servers, in case no colabdb is available [false]
Optional arguments:
--features Directory for feature GFF files [./phage_data/*/*.gff]
--proteins Directory for protein FAA files [./phage_data/*/*.faa]
--outDir Output directory [results]
--maxProteinLength Maximum length of proteins [1500]
--validdb Internal validation database. If not set internal validation will be skipped.
--workDir Nextflow work directory [work]
--cleanup Automatically delete work directory after successful pipeline run [true]
--help Show this message
"""
}
workflow {
///
/// IMPORT & FILTERING
///
// Import protein files and limit length to default:1500 residues
Channel.fromPath( params.proteins, checkIfExists: true )
| splitFasta( record: [id:true, desc:true, seqString:true] )
| filter { record -> record.seqString.length() < params.maxProteinLength }
| map{ it -> [id:it.id.replace("lcl|", ""), desc:it.desc, seqString:it.seqString]} // remove lcl| from start of id
| set { ch_allProteins }
FILTER( ch_allProteins )
///
/// CLUSTERING
///
CLUSTER( FILTER.out.allProteins )
///
/// INTERPROSCAN
///
INTERPROSCAN( CLUSTER.out.splitClusterReps )
///
/// STRUCTURE PREDICTION
///
STRUCTURE_PREDICITON( CLUSTER.out.splitClusterReps )
// Test setup for when structures were already predicted
// Channel.fromPath(["./resultsT4/colabfold/*_rank_001*.pdb", "./resultsT4/colabfold/*_rank_001*.json"])
// | map { it ->
// tuple((it =~ /colabfold\/(.*?)_(unrelaxed|relaxed|scores)/)[0][1], it)
// }
// | groupTuple()
// | map{ id, paths ->
// [id:id,
// pdb: paths.find{ it -> it.toString().endsWith(".pdb") },
// json: paths.find{ it -> it.toString().endsWith(".json")}] }
// | set{ temp }
///
/// FOLDSEEK SEARCH
///
STRUCTURE_PREDICITON.out
// temp
| branch { it ->
known: it =~ /.*_known.*/
unknown: it =~ /.*_unknown.*/
}
| set { structures }
SEARCH( structures.unknown )
///
/// VALIDATION & OUTPUT
///
if( params.validdb ) {
VALIDATE( structures.known )
}
REPORT_NEW (
SEARCH.out,
CLUSTER.out.clusterMembers,
FILTER.out.proteinDescriptions,
CLUSTER.out.allClusterReps,
INTERPROSCAN.out
)
GFF ( REPORT_NEW.out )
}
workflow.onComplete {
workflow.workDir.deleteDir()
def message = """
Pipeline execution summary
---------------------------
Completed at: ${workflow.complete}
Duration : ${workflow.duration}
Success : ${workflow.success}
workDir : Auto delete
outDir : ${params.outDir}
exit status : ${workflow.exitStatus}
"""
println message
}