diff --git a/bin/run_sequenza.R b/bin/run_sequenza.R index d767c20..6c13dcb 100755 --- a/bin/run_sequenza.R +++ b/bin/run_sequenza.R @@ -48,19 +48,19 @@ CP.example <- sequenza.fit(seqzdata, mc.cores = n_cores) ## Sequenza.extract seems to fail if too few mutations num_mutations <- unlist(lapply(seqzdata$mutations, nrow)) -chrom_list1 <- names(num_mutations)[num_mutations > 3] +chrom_list <- names(num_mutations)[num_mutations > 3] ## Also fails if segments <2 -num_segments <- unlist(lapply(seqzdata$segments, nrow)) -chrom_list2 <- names(num_mutations)[num_segments > 1] +#num_segments <- unlist(lapply(seqzdata$segments, nrow)) +#chrom_list2 <- names(num_mutations)[num_segments > 1] +#chrom_list <- intersect(chrom_list1,chrom_list2) -chrom_list <- intersect(chrom_list1,chrom_list2) not_included <- setdiff(names(num_mutations), chrom_list) print("Printing results...") if (length(not_included) > 0) { print("Excluding these chromosomes because of too few mutations and/or segments...") print(not_included) } -sequenza.results(sequenza.extract = seqzdata,cp.table = CP.example, sample.id = sampleid, out.dir=out_dir, chromosome.list=chrom_list) +sequenza.results(sequenza.extract = seqzdata,cp.table = CP.example, sample.id = sampleid, out.dir=out_dir) date() print("Done") diff --git a/conf/base.config b/conf/base.config index fb34ad8..902611f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -83,7 +83,7 @@ process { memory = { check_max( 96.GB * task.attempt, 'memory' ) } time = { check_max( 72.h * task.attempt, 'time' ) } } - withName:'qualimap' { + withName:'qualimap_bamqc' { cpus = { check_max( 8 * task.attempt, 'cpus' ) } memory = { check_max( 96.GB * task.attempt, 'memory' ) } time = { check_max( 24.h * task.attempt, 'time' ) } diff --git a/conf/biowulf.config b/conf/biowulf.config index 9e93c91..97c5c68 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -60,7 +60,7 @@ process { cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 24.GB * task.attempt, 'memory' ) } time = { check_max( 120.h * task.attempt, 'time' ) } - clusterOptions = ' --gres=lscratch:300 ' + clusterOptions = ' --gres=lscratch:500 ' } } diff --git a/conf/genomes.config b/conf/genomes.config index 8a807fd..94e2ef6 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -28,8 +28,9 @@ params { annotsvgenome = "GRCh38" octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc50Base.wig.gz" + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" HMFGENOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa" @@ -72,7 +73,8 @@ params { dbsnp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz" germline_resource = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz" gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz' - pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" + PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" + tonly_PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" KRAKENBACDB = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh37.75" snpeff_config = "/usr/local/apps/snpEff/4.3t/snpEff.config" @@ -85,8 +87,9 @@ params { annotsvgenome = "GRCh37" octopus_sforest= "" //NO hg19 somaticforest"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "" //no hg19 gforest"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/SEQUENZA/hg19.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/SEQUENZA/hg19.gc50Base.wig.gz" + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "37" HMFGENOME = "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa" @@ -142,14 +145,15 @@ params { annotsvgenome = "mm10" octopus_sforest = "" octopus_gforest = "" - SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz' + SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc200Base.wig.gz' + SEQUENZAGC_EXOME = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz' FREEC { FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10.fa.fai" FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/Chromosomes" FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.bed" FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt" } - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM'] + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY'] //CNVKIT REFFLAT = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/mm10/cnvkit/refFlat.txt" ACCESS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/mm10/cnvkit/access-10kb.mm10.bed" @@ -183,8 +187,9 @@ params { annotsvgenome = "GRCh38" octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc50Base.wig.gz" + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" HMFGENOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa" @@ -237,8 +242,9 @@ params { annotsvgenome = "GRCh38" octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc50Base.wig.gz" + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" HMFGENOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome_noalt/bwamem2/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta" diff --git a/modules/local/bwamem/bwamem2.nf b/modules/local/bwamem/bwamem2.nf index 8f26939..e9e94c8 100644 --- a/modules/local/bwamem/bwamem2.nf +++ b/modules/local/bwamem/bwamem2.nf @@ -11,7 +11,6 @@ process bwamem2 { else if (task.attempt == 3) return '200 GB' } - input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), diff --git a/modules/local/deepvariant.nf b/modules/local/deepvariant.nf index d584116..f4d7a5e 100644 --- a/modules/local/deepvariant.nf +++ b/modules/local/deepvariant.nf @@ -95,6 +95,7 @@ process deepvariant_step3 { """ postprocess_variants \ --ref $GENOMEREF \ + --sample_name ${samplename} \ --infile outdv/${samplename}_call_variants_output.tfrecord.gz \ --outfile ${samplename}_${bed}.vcf.gz \ --gvcf_outfile ${samplename}_${bed}.gvcf.gz \ @@ -151,19 +152,19 @@ process glnexus { """ glnexus_cli --config DeepVariant_unfiltered \ - *.gvcf.gz --threads 8 > germline.v.bcf + *.gvcf.gz --threads $task.cpus > germline.v.bcf bcftools norm \ -m - \ -Oz \ - --threads 8 \ + --threads $task.cpus \ -f $GENOMEREF \ -o germline.norm.vcf.gz \ germline.v.bcf bcftools index \ -f -t \ - --threads 8 \ + --threads $task.cpus \ germline.norm.vcf.gz """ @@ -198,7 +199,7 @@ process deepvariant_combined { --reads=${bam} \ --output_gvcf=${samplename}.gvcf.gz \ --output_vcf=${samplename}.vcf.gz \ - --num_shards=16 + --num_shards=$task.cpus """ diff --git a/modules/local/gridss.nf b/modules/local/gridss.nf index ab91c03..44a24b0 100644 --- a/modules/local/gridss.nf +++ b/modules/local/gridss.nf @@ -82,13 +82,10 @@ process gridss_tonly { output: tuple val(tumorname), - path("${tumorname}.vcf.gz"), - path("${tumorname}.vcf.gz.tbi"), + path("${tumorname}.vcf.gz"), path("${tumorname}.vcf.gz.tbi"), path("${tumorname}.vcf.gz.assembly.bam"), - path("${tumorname}.gripss.vcf.gz"), - path("${tumorname}.gripss.vcf.gz.tbi"), - path("${tumorname}.gripss.filtered.vcf.gz"), - path("${tumorname}.gripss.filtered.vcf.gz.tbi") + path("${tumorname}.gripss.vcf.gz"), path("${tumorname}.gripss.vcf.gz.tbi"), + path("${tumorname}.gripss.filtered.vcf.gz"), path("${tumorname}.gripss.filtered.vcf.gz.tbi") script: """ diff --git a/modules/local/purple.nf b/modules/local/purple.nf index 2a58dda..c89dc39 100644 --- a/modules/local/purple.nf +++ b/modules/local/purple.nf @@ -16,8 +16,9 @@ GERMLINEHOTSPOTS = file(params.genomes[params.genome].GERMLINEHOTSPOTS) process amber_tonly { container = "${params.containers.logan}" - label 'process_medium' + errorStrategy 'ignore' + input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -49,8 +50,8 @@ process amber_tonly { process amber_tn { container = "${params.containers.logan}" - label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -85,6 +86,7 @@ process amber_tn { process cobalt_tonly { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -116,6 +118,7 @@ process cobalt_tonly { process cobalt_tn { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -149,6 +152,7 @@ process cobalt_tn { process purple { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(id), val(tumorname), val(normalname), @@ -190,6 +194,7 @@ process purple { process purple_novc { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(id), val(tumorname), val(normalname), @@ -229,6 +234,7 @@ process purple_novc { process purple_tonly { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), @@ -251,7 +257,7 @@ process purple_tonly { $ENSEMBLCACHE \ -somatic_vcf ${somaticvcf} \ -driver_gene_panel $DRIVERS \ - -somatic_hotspots $HOTSPOTS \ + -somatic_hotspots $SOMATICHOTSPOTS \ -threads $task.cpus \ -output_dir ${tumorname} """ @@ -269,7 +275,8 @@ process purple_tonly { process purple_tonly_novc { container = "${params.containers.logan}" label 'process_medium' - + errorStrategy 'ignore' + input: tuple val(tumorname), val(normalname), path(cobaltin), path(amberin) diff --git a/modules/local/sequenza.nf b/modules/local/sequenza.nf index d276810..e6e9530 100644 --- a/modules/local/sequenza.nf +++ b/modules/local/sequenza.nf @@ -1,13 +1,17 @@ -GENOMEREF = file(params.genomes[params.genome].genome) - //SEQUENZA -SEQUENZAGC = file(params.genomes[params.genome].SEQUENZAGC) +GENOMEREF = file(params.genomes[params.genome].genome) +if(params.exome) { + GC = params.genomes[params.genome].SEQUENZAGC_EXOME +}else{ + GC = file(params.genomes[params.genome].SEQUENZAGC) +} SEQUENZA_SCRIPT = params.script_sequenza process seqz_sequenza_bychr { container = "${params.containers.logan}" label 'process_long' + errorStrategy 'ignore' input: tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), @@ -17,9 +21,10 @@ process seqz_sequenza_bychr { tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") script: + """ sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ + -gc $GC \ -F $GENOMEREF \ -C ${chr} \ -n ${normal} \ @@ -33,9 +38,76 @@ process seqz_sequenza_bychr { """ } + +process sequenza { + container = "${params.containers.logan}" + label 'process_medium' + errorStrategy 'ignore' + + input: + tuple val(pairid), path(seqz), val(window) + + output: + tuple val(pairid), + path("${pairid}_alternative_solutions.txt"), + path("${pairid}_alternative_fit.pdf"), + path("${pairid}_model_fit.pdf"), + path("${pairid}_confints_CP.txt"), + path("${pairid}_CN_bars.pdf"), + path("${pairid}_genome_view.pdf"), + path("${pairid}_chromosome_view.pdf"), + path("${pairid}_mutations.txt"), + path("${pairid}_segments.txt"), + path("${pairid}_CP_contours.pdf"), + path("${pairid}_sequenza_cp_table.RData"), + path("${pairid}_chromosome_depths.pdf"), + path("${pairid}_gc_plots.pdf"), + path("${pairid}_sequenza_extract.RData") + + shell: + ''' + zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ + sequenza-utils seqz_binning \ + -w !{window} \ + -s - > !{pairid}.bin!{window}.seqz + + Rscript !{SEQUENZA_SCRIPT} \ + !{pairid}.bin!{window}.seqz \ + . \ + !{pairid} \ + !{task.cpus} + ''' + + stub: + """ + touch "${pairid}_alternative_solutions.txt" + touch "${pairid}_alternative_fit.pdf" + touch "${pairid}_model_fit.pdf" + touch "${pairid}_confints_CP.txt" + touch "${pairid}_CN_bars.pdf" + touch "${pairid}_genome_view.pdf" + touch "${pairid}_chromosome_view.pdf" + touch "${pairid}_mutations.txt" + touch "${pairid}_segments.txt" + touch "${pairid}_CP_contours.pdf" + touch "${pairid}_sequenza_cp_table.RData" + touch "${pairid}_chromosome_depths.pdf" + touch "${pairid}_gc_plots.pdf" + touch "${pairid}_sequenza_extract.RData" + + """ + +} + + + + +//**NOTE**: This process is not used in the pipeline, but is kept for reference + process pileup_sequenza { container = "${params.containers.logan}" label 'process_low' + errorStrategy 'ignore' input: tuple val(pairid), val(name), @@ -61,6 +133,7 @@ process pileup_sequenza { process seqz_sequenza_reg { container = "${params.containers.logan}" label 'process_low' + errorStrategy 'ignore' input: tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), @@ -72,7 +145,7 @@ process seqz_sequenza_reg { script: """ sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ + -gc $GC \ -p \ -F $GENOMEREF \ -n ${normal} \ @@ -86,6 +159,7 @@ process seqz_sequenza_reg { """ } + process seqz_sequenza { container = "${params.containers.logan}" label 'process_low' @@ -100,7 +174,7 @@ process seqz_sequenza { script: """ sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ + -gc $GC \ -p \ -F $GENOMEREF \ -n ${normal} \ @@ -113,69 +187,3 @@ process seqz_sequenza { touch "${tumorname}_${normalname}_${chr}.seqz.gz" """ } - - - - -process sequenza { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(pairid), path(seqz) - - output: - tuple val(pairid), - path("${pairid}_alternative_solutions.txt"), - path("${pairid}_alternative_fit.pdf"), - path("${pairid}_model_fit.pdf"), - path("${pairid}_confints_CP.txt"), - path("${pairid}_CN_bars.pdf"), - path("${pairid}_genome_view.pdf"), - path("${pairid}_chromosome_view.pdf"), - path("${pairid}_mutations.txt"), - path("${pairid}_segments.txt"), - path("${pairid}_CP_contours.pdf"), - path("${pairid}_sequenza_cp_table.RData"), - path("${pairid}_chromosome_depths.pdf"), - path("${pairid}_gc_plots.pdf"), - path("${pairid}_sequenza_extract.RData") - - - shell: - ''' - - zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ - sequenza-utils seqz_binning \ - -w 100 \ - -s - > !{pairid}.bin100.seqz - - Rscript !{SEQUENZA_SCRIPT} \ - !{pairid}.bin100.seqz \ - . \ - !{pairid} \ - !{task.cpus} - - ''' - - stub: - - """ - touch "${pairid}_alternative_solutions.txt" - touch "${pairid}_alternative_fit.pdf" - touch "${pairid}_model_fit.pdf" - touch "${pairid}_confints_CP.txt" - touch "${pairid}_CN_bars.pdf" - touch "${pairid}_genome_view.pdf" - touch "${pairid}_chromosome_view.pdf" - touch "${pairid}_mutations.txt" - touch "${pairid}_segments.txt" - touch "${pairid}_CP_contours.pdf" - touch "${pairid}_sequenza_cp_table.RData" - touch "${pairid}_chromosome_depths.pdf" - touch "${pairid}_gc_plots.pdf" - touch "${pairid}_sequenza_extract.RData" - - """ - -} diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 297dd6e..3c2cb5b 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -129,7 +129,7 @@ process applybqsr { Base quality recalibration for all samples to */ container = "${params.containers.logan}" - label 'process_long' + label 'process_highmem' input: tuple val(samplename), path(bam), path(bai), path("${samplename}.recal_data.grp") @@ -140,7 +140,7 @@ process applybqsr { script: """ - gatk --java-options '-Xmx16g' ApplyBQSR \ + gatk --java-options '-Xmx20g' ApplyBQSR \ --reference ${GENOMEREF} \ --input ${bam} \ --bqsr-recal-file ${samplename}.recal_data.grp \ diff --git a/nextflow.config b/nextflow.config index 617315d..2ee89a1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -54,6 +54,7 @@ params { no_tonly=null ffpe=null exome=null + no_trim=null //Set all Inputs to null sample_sheet=null diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index d7821d6..2290afb 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -197,6 +197,10 @@ workflow ALIGN { return tuple(samplename,fq) } } | flatten() + }else if (params.no_trim){ + fastp_out=fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])} + fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])}| bwamem2 + alignment_out=bwamem2.out }else{ fastp_out = fastp(fastqinput) | map{sample,f1,f2,json,html -> tuple(sample,f1,f2)} bwamem2(fastp_out) @@ -959,16 +963,21 @@ workflow CNVmouse { main: cnvcall_list = params.cnvcallers.split(',') as List - //Sequenza (Preferred for Paired) + //Sequenza if ("sequenza" in cnvcall_list){ - chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} - seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} - | sequenza - } + if (params.exome){ + windowsize=Channel.value(50) + }else{ + windowsize=Channel.value(200) + } + chrs=Channel.fromList(params.genomes[params.genome].chromosomes) + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} + seqzin.combine(chrs) | seqz_sequenza_bychr + seqz_sequenza_bychr.out.groupTuple() + | combine(windowsize) + | sequenza + } //FREEC Paired Mode if ("freec" in cnvcall_list){ @@ -1029,14 +1038,19 @@ workflow CNVhuman { //Sequenza if ("sequenza" in cnvcall_list){ + if (params.exome){ + windowsize=Channel.value(50) + }else{ + windowsize=Channel.value(200) + } chrs=Channel.fromList(params.genomes[params.genome].chromosomes) seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + | combine(windowsize) | sequenza - } + } //FREEC if ("freec" in cnvcall_list){ @@ -1100,15 +1114,19 @@ workflow CNVhuman_novc { if ("sequenza" in cnvcall_list){ - //Sequenza + if (params.exome){ + windowsize=Channel.value(50) + }else{ + windowsize=Channel.value(200) + } chrs=Channel.fromList(params.genomes[params.genome].chromosomes) seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + | combine(windowsize) | sequenza - } + } if ("freec" in cnvcall_list){ //FREEC diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 8123f09..1e8069a 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -157,6 +157,10 @@ workflow ALIGN_TONLY { return tuple(samplename,fq) } } | flatten() + }else if (params.no_trim){ + fastp_out=fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])} + fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])}| bwamem2 + alignment_out=bwamem2.out }else{ fastp_out = fastp(fastqinput) | map{sample,f1,f2,json,html -> tuple(sample,f1,f2)} bwamem2(fastp_out) @@ -281,7 +285,7 @@ workflow VC_TONLY { if ("vardict" in call_list){ vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple() | map{tumor,vcf-> - tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly","-i 'SBF<0.1 && QUAL >20 && DP >20'")} + tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly","-i 'SBF<0.1 && QUAL >20 && INFO/DP >20'")} | combineVariants_vardict_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)} @@ -490,7 +494,7 @@ workflow SV_TONLY { //Manta if ("manta" in svcall_list){ manta_out=manta_tonly(bamwithsample) - .map{tumor, sv, indel, tumorsv -> + .map{tumor, sv, svtbi, indel, indeltbi, tumorsv, tumorsvtbi-> tuple(tumor,tumorsv,"manta_tonly")} annotsv_manta_tonly(manta_out).ifEmpty("Empty SV input--No SV annotated") svout=svout | concat(manta_out) @@ -501,15 +505,15 @@ workflow SV_TONLY { gridss_out=gridss_tonly(bamwithsample) gridss_out_forsv=gridss_out | map{tumor,vcf,index,bam,gripssvcf,gripsstbi,gripssfilt,filttbi -> - tuple(tumor,gripssfilt,"gridss_tonly")} | gunzip_gridss + tuple(tumor,gripssfilt,"gridss_tonly")} | gunzip_gridss annotsv_gridss_tonly(gridss_out_forsv).ifEmpty("Empty SV input--No SV annotated") - svout=svout | concat(gridss_out) + svout=svout | concat(gridss_out_forsv) } //Survivor if (svcall_list.size()>1){ //Survivor - svout | groupTuple + svout | groupTuple | survivor_sv | annotsv_survivor_tonly | ifEmpty("Empty SV input--No SV annotated") @@ -521,8 +525,8 @@ workflow SV_TONLY { tuple(tumor,vcf,index,gripsstbi,gripssfilt,filttbi)} }else if("manta" in svcall_list){ somaticsv_input=manta_out - | map{tumor,gsv,gsv_tbi,so_sv,so_sv_tbi,unfil_sv,unfil_sv_tbi,unfil_indel,unfil_indel_tbi -> - tuple(tumor,unfil_sv,unfil_sv_tbi,so_sv,so_sv_tbi)} + .map{tumor, sv, svtbi, indel, indeltbi, tumorsv, tumorsvtbi-> + tuple(tumor,sv,svtbi,tumorsv,tumorsvtbi)} }else{ somaticsv_input=Channel.empty() }