spectrafuse/nextflow.config at main · bigbio/spectrafuse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    bigbio/spectrafuse Nextflow config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Default config options for all compute environments
----------------------------------------------------------------------------------------
*/

// Global default params, used in configs
params {
    // Input options
    parquet_dir = null

    // Optional path to an existing cluster DB. When set, the new QPX data is
    // clustered *together with* representative spectra from the existing DB,
    // and results are merged in (preserving cluster IDs where reps appear).
    existing_cluster_db = null
    // Default species/instrument when QPX metadata grouping is not yet implemented
    default_species = null
    default_instrument = null

    // MaraCluster parameters
    maracluster_pvalue_threshold = -10.0 // Set log10(p-value) threshold for MaraCluster (default -10.0 = p-value 1e-10)
    maracluster_verbose = false // Set to true to enable verbose output from MaraCluster
    maracluster_files_list_folder = null // Set to a folder containing a list of files to process with MaraCluster
    maracluster_output_folder = './' // Set to a folder to output the results of MaraCluster
    maracluster_precursor_tolerance = 20.0 // Set the precursor tolerance for MaraCluster
    cluster_threshold = 30 // Set the p-value threshold for MaRaCluster output file naming (e.g., *_p30.tsv)

    // Partitioning parameters
    skip_instrument = false // Set to true to cluster all instruments together (no instrument partitioning)

    // Precursor m/z windowing parameters (parallelizes MaRaCluster within each charge)
    mz_window_size = 300    // Width of each precursor m/z window in Da
    mz_window_overlap = 1.0 // Overlap between adjacent windows in Da (safety margin)

    // Incremental mode parameters (dat-based, no MGF)

    // MSP format generation parameters (consensus spectrum parameters)
    strategytype = 'best' // Strategy type for consensus spectrum generation
    sim = 'dot' // Similarity metric for consensus spectrum
    fragment_mz_tolerance = 0.02 // Fragment m/z tolerance for consensus spectrum
    min_mz = 100 // Minimum m/z value
    max_mz = 2000 // Maximum m/z value
    bin_size = 0.02 // Bin size for m/z binning
    peak_quorum = 0.25 // Peak quorum threshold
    edge_case_threshold = 0.5 // Edge case threshold
    diff_thresh = 0.01 // Difference threshold
    dyn_range = 1000 // Dynamic range
    min_fraction = 0.5 // Minimum fraction
    pepmass = 'lower_median' // Peptide mass calculation method
    msms_avg = 'weighted' // MS/MS averaging method

    // Boilerplate options
    outdir                     = './results'
    publish_dir_mode           = 'copy'
    email                      = null
    email_on_fail              = null
    plaintext_email            = false
    monochrome_logs            = false
    hook_url                   = System.getenv('HOOK_URL')
    help                       = false
    help_full                  = false
    show_hidden                = false
    version                    = false
    trace_report_suffix        = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')

    // Config options
    config_profile_name        = null
    config_profile_description = null

    custom_config_version      = 'master'
    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
    config_profile_description = null
    config_profile_contact     = null
    config_profile_url         = null

    // Schema validation default options
    validate_params            = true

    // Resource limits
    max_memory = 128.GB
    max_cpus = 16
    max_time = 240.h
}

// Function to ensure that resource requirements don't go beyond a maximum limit
// Must be defined before base.config is included
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

profiles {
    debug {
        dumpHashes              = true
        process.beforeScript    = 'echo $HOSTNAME'
        cleanup                 = false
        nextflow.enable.configProcessNamesValidation = true
    }
    docker {
        docker.enabled          = true
        conda.enabled           = false
        singularity.enabled     = false
        podman.enabled          = false
        shifter.enabled         = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
        docker.runOptions       = '-u $(id -u):$(id -g)'
    }
    arm64 {
        process.arch            = 'arm64'
        apptainer.ociAutoPull   = true
        singularity.ociAutoPull = true
        wave.enabled            = true
        wave.freeze             = true
        wave.strategy           = 'conda,container'
    }
    emulate_amd64 {
        docker.runOptions       = '-u $(id -u):$(id -g) --platform=linux/amd64'
    }
    singularity {
        singularity.enabled     = true
        singularity.autoMounts  = true
        // Large images (~1+ GB) can exceed the default 20-minute pull timeout.
        singularity.pullTimeout = '3 h'
        conda.enabled           = false
        docker.enabled          = false
        podman.enabled          = false
        shifter.enabled         = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
    }
    podman {
        podman.enabled          = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled     = false
        shifter.enabled         = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
    }
    shifter {
        shifter.enabled         = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled    = false
        podman.enabled          = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
    }
    charliecloud {
        charliecloud.enabled    = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled     = false
        podman.enabled          = false
        shifter.enabled         = false
        apptainer.enabled       = false
    }
    apptainer {
        apptainer.enabled       = true
        apptainer.autoMounts    = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled     = false
        podman.enabled          = false
        shifter.enabled         = false
        charliecloud.enabled    = false
    }
    wave {
        apptainer.ociAutoPull   = true
        singularity.ociAutoPull = true
        wave.enabled            = true
        wave.freeze             = true
        wave.strategy           = 'conda,container'
    }
    gpu {
        docker.runOptions       = '-u $(id -u):$(id -g) --gpus all'
        apptainer.runOptions    = '--nv'
        singularity.runOptions  = '--nv'
    }
    // EBI Codon SLURM cluster profile (includeConfig conf/codon_slurm.config)
    codon_slurm {
        includeConfig 'conf/codon_slurm.config'
    }
    test { includeConfig 'conf/tests/test.config' }
}

// Load nf-core custom profiles from different institutions
// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included.
includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"

// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile
// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled
// Set to your registry if you have a mirror of containers
apptainer.registry    = 'quay.io'
docker.registry       = 'quay.io'
podman.registry       = 'quay.io'
singularity.registry  = 'quay.io'
charliecloud.registry = 'quay.io'

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
}

// Set bash options
process.shell = [
    "bash",
    "-C",         // No clobber - prevent output redirection from overwriting files.
    "-e",         // Exit if a tool returns a non-zero status/exit code
    "-u",         // Treat unset variables and parameters as an error
    "-o",         // Enable the pipefail option for bash
    "pipefail"    // Returns the status of the last command to exit with a non-zero status, or zero if all commands succeed
]

// Disable process selector warnings by default. Use debug profile to enable warnings.
nextflow.enable.configProcessNamesValidation = false

timeline {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html"
}
report {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html"
}
trace {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt"
}
dag {
    enabled = true
    file    = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_report_suffix}.html"
}

manifest {
    name            = 'bigbio/spectrafuse'
    homePage        = 'https://github.com/bigbio/spectrafuse'
    description     = """Incremental spectral clustering pipeline from quantms data"""
    mainScript      = 'main.nf'
    defaultBranch   = 'master'
    nextflowVersion = '!>=23.04.0'
    version         = '0.1.0'
}