Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 42 additions & 30 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,43 +9,39 @@
----------------------------------------------------------------------------------------
*/

nextflow.preview.output = true
nextflow.preview.types = true

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS / TYPES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { SRA } from './workflows/sra'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
include { softwareVersionsToYAML } from './subworkflows/nf-core/utils_nfcore_pipeline'
include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline'
include { Sample } from './workflows/sra'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NAMED WORKFLOWS FOR PIPELINE
WORKFLOW INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// WORKFLOW: Run main nf-core/fetchngs analysis pipeline depending on type of identifier provided
//
workflow NFCORE_FETCHNGS {
params {

take:
ids // channel: database ids read in from --input
// List of SRA/ENA/GEO/DDBJ identifiers to download their associated metadata and FastQ files
input: Path

main:
// Comma-separated list of ENA metadata fields to fetch before downloading data
ena_metadata_fields: String = ''

//
// WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids
//
SRA ( ids )
// Only download metadata for public data database ids and don't download the FastQ files
skip_fastq_download: Boolean = false

emit:
samples = SRA.out.samples
metadata = SRA.out.metadata
// dbGaP repository key
dbgap_key: Path?
}

/*
Expand All @@ -60,42 +56,57 @@ workflow {
//
// SUBWORKFLOW: Run initialisation tasks
//
PIPELINE_INITIALISATION (
ids = PIPELINE_INITIALISATION (
params.version,
params.validate_params,
params.monochrome_logs,
args,
params.outdir,
workflow.outputDir,
params.input,
params.ena_metadata_fields
)

//
// WORKFLOW: Run primary workflows for the pipeline
//
NFCORE_FETCHNGS (
PIPELINE_INITIALISATION.out.ids
sra = SRA (
channel.fromList(ids),
params
)

//
// SUBWORKFLOW: Collect software versions
//
versions = SOFTWARE_VERSIONS()

//
// SUBWORKFLOW: Run completion tasks
//
PIPELINE_COMPLETION (
params.email,
params.email_on_fail,
params.plaintext_email,
params.outdir,
workflow.outputDir,
params.monochrome_logs,
params.hook_url
)

publish:
samples = NFCORE_FETCHNGS.out.samples
metadata = NFCORE_FETCHNGS.out.metadata
versions = softwareVersionsToYAML()
samples = sra.samples
runinfo_ftp = sra.runinfo_ftp
versions = versions
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WORKFLOW OUTPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

output {
samples {

// List of FASTQ samples with optional MD5 checksums
samples: Channel<Sample> {
path { sample ->
sample.fastq_1 >> 'fastq/'
sample.fastq_2 >> 'fastq/'
Expand All @@ -107,12 +118,13 @@ output {
}
}

metadata {
// List of download links for the given sample ids
runinfo_ftp: Channel<Path> {
path 'metadata'
}

versions {
path '.'
// Manifest of tool versions used by the pipeline for MultiQC
versions: Map<String,Map> {
index {
path 'nf_core_fetchngs_software_mqc_versions.yml'
}
Expand Down
48 changes: 32 additions & 16 deletions modules/local/aspera_cli/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
nextflow.preview.types = true

process ASPERA_CLI {
tag "$meta.id"
tag id
label 'process_medium'

conda "${moduleDir}/environment.yml"
Expand All @@ -8,29 +10,43 @@ process ASPERA_CLI {
'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }"

input:
tuple val(meta), val(fastq)
val user
(
id: String,
single_end: Boolean,
fastq_aspera: String,
md5_1: String,
md5_2: String?
): Record
user: String

output:
tuple val(meta), path("*fastq.gz"), emit: fastq
tuple val(meta), path("*md5") , emit: md5
tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions
record(
id: id,
fastq_1: file('*_1.fastq.gz'),
fastq_2: file('*_2.fastq.gz', optional: true),
md5_1: file('*_1.fastq.gz.md5'),
md5_2: file('*_2.fastq.gz.md5', optional: true),
)

topic:
record(process: task.process, name: 'aspera_cli', version: eval('ascli --version')) >> 'versions'

script:
def args = task.ext.args ?: ''
def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : ""
if (meta.single_end) {
def fastq = fastq_aspera.tokenize(';')
if (single_end) {
"""
$conda_prefix

ascp \\
$args \\
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
${user}@${fastq[0]} \\
${meta.id}.fastq.gz
${id}.fastq.gz

echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
md5sum -c ${meta.id}.fastq.gz.md5
echo "${md5_1} ${id}.fastq.gz" > ${id}.fastq.gz.md5
md5sum -c ${id}.fastq.gz.md5
"""
} else {
"""
Expand All @@ -40,19 +56,19 @@ process ASPERA_CLI {
$args \\
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
${user}@${fastq[0]} \\
${meta.id}_1.fastq.gz
${id}_1.fastq.gz

echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
md5sum -c ${meta.id}_1.fastq.gz.md5
echo "${md5_1} ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
md5sum -c ${id}_1.fastq.gz.md5

ascp \\
$args \\
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
${user}@${fastq[1]} \\
${meta.id}_2.fastq.gz
${id}_2.fastq.gz

echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
md5sum -c ${meta.id}_2.fastq.gz.md5
echo "${md5_2} ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
md5sum -c ${id}_2.fastq.gz.md5
"""
}
}
52 changes: 34 additions & 18 deletions modules/local/sra_fastq_ftp/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

nextflow.preview.types = true

process SRA_FASTQ_FTP {
tag "$meta.id"
tag id
label 'process_low'
label 'error_retry'

Expand All @@ -10,42 +12,56 @@ process SRA_FASTQ_FTP {
'biocontainers/wget:1.21.4' }"

input:
tuple val(meta), val(fastq)
(
id: String,
single_end: Boolean,
fastq_1: String,
fastq_2: String?,
md5_1: String,
md5_2: String?
): Record

output:
tuple val(meta), path("*fastq.gz"), emit: fastq
tuple val(meta), path("*md5") , emit: md5
tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions
record(
id: id,
fastq_1: file('*_1.fastq.gz'),
fastq_2: file('*_2.fastq.gz'),
md5_1: file('*_1.fastq.gz.md5'),
md5_2: file('*_2.fastq.gz.md5'),
)

topic:
record(process: task.process, name: 'wget', version: eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')")) >> 'versions'

script:
def args = task.ext.args ?: ''
if (meta.single_end) {
if (single_end) {
"""
wget \\
$args \\
-O ${meta.id}.fastq.gz \\
${fastq[0]}
-O ${id}.fastq.gz \\
${fastq_1}

echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
md5sum -c ${meta.id}.fastq.gz.md5
echo "${md5_1} ${id}.fastq.gz" > ${id}.fastq.gz.md5
md5sum -c ${id}.fastq.gz.md5
"""
} else {
"""
wget \\
$args \\
-O ${meta.id}_1.fastq.gz \\
${fastq[0]}
-O ${id}_1.fastq.gz \\
${fastq_1}

echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
md5sum -c ${meta.id}_1.fastq.gz.md5
echo "${md5_1} ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
md5sum -c ${id}_1.fastq.gz.md5

wget \\
$args \\
-O ${meta.id}_2.fastq.gz \\
${fastq[1]}
-O ${id}_2.fastq.gz \\
${fastq_2}

echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
md5sum -c ${meta.id}_2.fastq.gz.md5
echo "${md5_2} ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
md5sum -c ${id}_2.fastq.gz.md5
"""
}
}
14 changes: 9 additions & 5 deletions modules/local/sra_ids_to_runinfo/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

nextflow.preview.types = true

process SRA_IDS_TO_RUNINFO {
tag "$id"
tag id
label 'error_retry'

conda "conda-forge::python=3.9.5"
Expand All @@ -9,12 +11,14 @@ process SRA_IDS_TO_RUNINFO {
'biocontainers/python:3.9--1' }"

input:
val id
val fields
id: String
fields: String

output:
path "*.tsv" , emit: tsv
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
file('*.runinfo.tsv')

topic:
record(process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")) >> 'versions'

script:
def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : ''
Expand Down
14 changes: 9 additions & 5 deletions modules/local/sra_runinfo_to_ftp/main.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@

nextflow.preview.types = true

process SRA_RUNINFO_TO_FTP {

conda "conda-forge::python=3.9.5"
Expand All @@ -7,16 +9,18 @@ process SRA_RUNINFO_TO_FTP {
'biocontainers/python:3.9--1' }"

input:
path runinfo
runinfo: Path

output:
path "*.tsv" , emit: tsv
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
file('*.runinfo_ftp.tsv')

topic:
record(process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")) >> 'versions'

script:
"""
sra_runinfo_to_ftp.py \\
${runinfo.join(',')} \\
${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv
${runinfo} \\
${runinfo.baseName.tokenize(".")[0]}.runinfo_ftp.tsv
"""
}
12 changes: 7 additions & 5 deletions modules/nf-core/custom/sratoolsncbisettings/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading