From 3e2e1925f3ad56299e5ea2a54ec3e279148087da Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 8 Apr 2025 11:19:43 -0400 Subject: [PATCH 01/14] fix: typo in process name --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index fb34ad8..902611f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -83,7 +83,7 @@ process { memory = { check_max( 96.GB * task.attempt, 'memory' ) } time = { check_max( 72.h * task.attempt, 'time' ) } } - withName:'qualimap' { + withName:'qualimap_bamqc' { cpus = { check_max( 8 * task.attempt, 'cpus' ) } memory = { check_max( 96.GB * task.attempt, 'memory' ) } time = { check_max( 24.h * task.attempt, 'time' ) } From 2c6ca3f60f111568f15a8bfeb32e21d8c48ac920 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 10 Apr 2025 09:51:05 -0400 Subject: [PATCH 02/14] fix: fixed sample name for deepvariant --- modules/local/deepvariant.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/deepvariant.nf b/modules/local/deepvariant.nf index d584116..4e3aeae 100644 --- a/modules/local/deepvariant.nf +++ b/modules/local/deepvariant.nf @@ -95,6 +95,7 @@ process deepvariant_step3 { """ postprocess_variants \ --ref $GENOMEREF \ + --sample_name ${samplename} \ --infile outdv/${samplename}_call_variants_output.tfrecord.gz \ --outfile ${samplename}_${bed}.vcf.gz \ --gvcf_outfile ${samplename}_${bed}.gvcf.gz \ From 869d05aee2944b17b1acde1966a9caee200191d5 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 10 Apr 2025 14:30:12 -0400 Subject: [PATCH 03/14] fix: don't exclude any regions from sequenza --- bin/run_sequenza.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/run_sequenza.R b/bin/run_sequenza.R index d767c20..6c13dcb 100755 --- a/bin/run_sequenza.R +++ b/bin/run_sequenza.R @@ -48,19 +48,19 @@ CP.example <- sequenza.fit(seqzdata, mc.cores = n_cores) ## Sequenza.extract seems to fail if too few mutations num_mutations <- unlist(lapply(seqzdata$mutations, nrow)) -chrom_list1 <- names(num_mutations)[num_mutations > 3] +chrom_list <- names(num_mutations)[num_mutations > 3] ## Also fails if segments <2 -num_segments <- unlist(lapply(seqzdata$segments, nrow)) -chrom_list2 <- names(num_mutations)[num_segments > 1] +#num_segments <- unlist(lapply(seqzdata$segments, nrow)) +#chrom_list2 <- names(num_mutations)[num_segments > 1] +#chrom_list <- intersect(chrom_list1,chrom_list2) -chrom_list <- intersect(chrom_list1,chrom_list2) not_included <- setdiff(names(num_mutations), chrom_list) print("Printing results...") if (length(not_included) > 0) { print("Excluding these chromosomes because of too few mutations and/or segments...") print(not_included) } -sequenza.results(sequenza.extract = seqzdata,cp.table = CP.example, sample.id = sampleid, out.dir=out_dir, chromosome.list=chrom_list) +sequenza.results(sequenza.extract = seqzdata,cp.table = CP.example, sample.id = sampleid, out.dir=out_dir) date() print("Done") From 30591ba46fc9f22751a6f74f920fc541415711de Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 10 Apr 2025 14:30:23 -0400 Subject: [PATCH 04/14] fix: set task.cpus --- modules/local/deepvariant.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/local/deepvariant.nf b/modules/local/deepvariant.nf index 4e3aeae..f4d7a5e 100644 --- a/modules/local/deepvariant.nf +++ b/modules/local/deepvariant.nf @@ -152,19 +152,19 @@ process glnexus { """ glnexus_cli --config DeepVariant_unfiltered \ - *.gvcf.gz --threads 8 > germline.v.bcf + *.gvcf.gz --threads $task.cpus > germline.v.bcf bcftools norm \ -m - \ -Oz \ - --threads 8 \ + --threads $task.cpus \ -f $GENOMEREF \ -o germline.norm.vcf.gz \ germline.v.bcf bcftools index \ -f -t \ - --threads 8 \ + --threads $task.cpus \ germline.norm.vcf.gz """ @@ -199,7 +199,7 @@ process deepvariant_combined { --reads=${bam} \ --output_gvcf=${samplename}.gvcf.gz \ --output_vcf=${samplename}.vcf.gz \ - --num_shards=16 + --num_shards=$task.cpus """ From 1ee7a3a76d639c24e1f3342ee4a9bf37c43c3053 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 15 Apr 2025 12:30:25 -0400 Subject: [PATCH 05/14] fix: increase mem for apply --- modules/local/trim_align.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index 297dd6e..3c2cb5b 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -129,7 +129,7 @@ process applybqsr { Base quality recalibration for all samples to */ container = "${params.containers.logan}" - label 'process_long' + label 'process_highmem' input: tuple val(samplename), path(bam), path(bai), path("${samplename}.recal_data.grp") @@ -140,7 +140,7 @@ process applybqsr { script: """ - gatk --java-options '-Xmx16g' ApplyBQSR \ + gatk --java-options '-Xmx20g' ApplyBQSR \ --reference ${GENOMEREF} \ --input ${bam} \ --bqsr-recal-file ${samplename}.recal_data.grp \ From e0db83ba9e9db87414cd1aa66c3da065234d7526 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 15 Apr 2025 16:33:39 -0400 Subject: [PATCH 06/14] fix: simplify sequenza and remove chrm --- conf/genomes.config | 10 +++++----- subworkflows/local/workflows.nf | 13 +++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index 8a807fd..ad5f979 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -29,7 +29,7 @@ params { octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" HMFGENOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa" @@ -86,7 +86,7 @@ params { octopus_sforest= "" //NO hg19 somaticforest"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "" //no hg19 gforest"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "37" HMFGENOME = "/data/CCBR_Pipeliner/db/PipeDB/lib/hs37d5.fa" @@ -149,7 +149,7 @@ params { FREECPILEUP = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.bed" FREECSNPS= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10_dbSNP137.ucsc.freec.txt" } - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY','chrM'] + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chrX','chrY'] //CNVKIT REFFLAT = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/mm10/cnvkit/refFlat.txt" ACCESS = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/mm10/cnvkit/access-10kb.mm10.bed" @@ -184,7 +184,7 @@ params { octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" HMFGENOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/bwamem2/GRCh38.d1.vd1.fa" @@ -238,7 +238,7 @@ params { octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" - chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY','chrM'] + chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" HMFGENOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/genome_noalt/bwamem2/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta" diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index d7821d6..3c6676e 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -965,11 +965,10 @@ workflow CNVmouse { seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + seqz_sequenza_bychr.out.groupTuple() | sequenza - } + } //FREEC Paired Mode if ("freec" in cnvcall_list){ if(params.exome){ @@ -1033,10 +1032,9 @@ workflow CNVhuman { seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + seqz_sequenza_bychr.out.groupTuple() | sequenza - } + } //FREEC if ("freec" in cnvcall_list){ @@ -1105,8 +1103,7 @@ workflow CNVhuman_novc { seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - .map{pair, seqz -> tuple(pair, seqz.sort{it.name})} + seqz_sequenza_bychr.out.groupTuple() | sequenza } From 97446b7cebc3623513e9b671b8c55e22a20086a5 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 16 Apr 2025 22:44:17 -0400 Subject: [PATCH 07/14] fix: sequenza references --- conf/genomes.config | 15 ++-- modules/local/bwamem/bwamem2.nf | 1 - modules/local/sequenza.nf | 151 +++++++++++++++++--------------- 3 files changed, 89 insertions(+), 78 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index ad5f979..2cf3496 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -28,7 +28,8 @@ params { annotsvgenome = "GRCh38" octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc50Base.wig.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" @@ -85,7 +86,8 @@ params { annotsvgenome = "GRCh37" octopus_sforest= "" //NO hg19 somaticforest"--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "" //no hg19 gforest"--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/SEQUENZA/hg19.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/SEQUENZA/hg19.gc50Base.wig.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "37" @@ -142,7 +144,8 @@ params { annotsvgenome = "mm10" octopus_sforest = "" octopus_gforest = "" - SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz' + SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc200Base.wig.gz' + SEQUENZAGC_EXOME = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz' FREEC { FREECLENGTHS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/mm10.fa.fai" FREECCHROMS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/FREEC/Chromosomes" @@ -183,7 +186,8 @@ params { annotsvgenome = "GRCh38" octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc50Base.wig.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" @@ -237,7 +241,8 @@ params { annotsvgenome = "GRCh38" octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" - SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" + SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc200Base.wig.gz" + SEQUENZAGC_EXOME = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38.gc50Base.wig.gz" chromosomes = ['chr1','chr2','chr3','chr4','chr5','chr6','chr7','chr8','chr9','chr10','chr11','chr12','chr13','chr14','chr15','chr16','chr17','chr18','chr19','chr20','chr21','chr22','chrX','chrY'] //HMFTOOLS GENOMEVER = "38" diff --git a/modules/local/bwamem/bwamem2.nf b/modules/local/bwamem/bwamem2.nf index 8f26939..e9e94c8 100644 --- a/modules/local/bwamem/bwamem2.nf +++ b/modules/local/bwamem/bwamem2.nf @@ -11,7 +11,6 @@ process bwamem2 { else if (task.attempt == 3) return '200 GB' } - input: tuple val(samplename), path("${samplename}.R1.trimmed.fastq.gz"), diff --git a/modules/local/sequenza.nf b/modules/local/sequenza.nf index d276810..31cb71b 100644 --- a/modules/local/sequenza.nf +++ b/modules/local/sequenza.nf @@ -1,7 +1,10 @@ -GENOMEREF = file(params.genomes[params.genome].genome) - //SEQUENZA -SEQUENZAGC = file(params.genomes[params.genome].SEQUENZAGC) +GENOMEREF = file(params.genomes[params.genome].genome) +if(params.exome) { + GC = params.genomes[params.genome].SEQUENZAGC_EXOME +}else{ + GC = file(params.genomes[params.genome].SEQUENZAGC) +} SEQUENZA_SCRIPT = params.script_sequenza @@ -17,9 +20,10 @@ process seqz_sequenza_bychr { tuple val(pairid), path("${tumorname}_${normalname}_${chr}.seqz.gz") script: + """ sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ + -gc $GC \ -F $GENOMEREF \ -C ${chr} \ -n ${normal} \ @@ -33,6 +37,74 @@ process seqz_sequenza_bychr { """ } + +process sequenza { + container = "${params.containers.logan}" + label 'process_medium' + + input: + tuple val(pairid), path(seqz), val(window) + + output: + tuple val(pairid), + path("${pairid}_alternative_solutions.txt"), + path("${pairid}_alternative_fit.pdf"), + path("${pairid}_model_fit.pdf"), + path("${pairid}_confints_CP.txt"), + path("${pairid}_CN_bars.pdf"), + path("${pairid}_genome_view.pdf"), + path("${pairid}_chromosome_view.pdf"), + path("${pairid}_mutations.txt"), + path("${pairid}_segments.txt"), + path("${pairid}_CP_contours.pdf"), + path("${pairid}_sequenza_cp_table.RData"), + path("${pairid}_chromosome_depths.pdf"), + path("${pairid}_gc_plots.pdf"), + path("${pairid}_sequenza_extract.RData") + + + shell: + ''' + + zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ + sequenza-utils seqz_binning \ + -w !{window} \ + -s - > !{pairid}.bin!{window}.seqz + + Rscript !{SEQUENZA_SCRIPT} \ + !{pairid}.bin!{window}.seqz \ + . \ + !{pairid} \ + !{task.cpus} + + ''' + + stub: + + """ + touch "${pairid}_alternative_solutions.txt" + touch "${pairid}_alternative_fit.pdf" + touch "${pairid}_model_fit.pdf" + touch "${pairid}_confints_CP.txt" + touch "${pairid}_CN_bars.pdf" + touch "${pairid}_genome_view.pdf" + touch "${pairid}_chromosome_view.pdf" + touch "${pairid}_mutations.txt" + touch "${pairid}_segments.txt" + touch "${pairid}_CP_contours.pdf" + touch "${pairid}_sequenza_cp_table.RData" + touch "${pairid}_chromosome_depths.pdf" + touch "${pairid}_gc_plots.pdf" + touch "${pairid}_sequenza_extract.RData" + + """ + +} + + + + + process pileup_sequenza { container = "${params.containers.logan}" label 'process_low' @@ -72,7 +144,7 @@ process seqz_sequenza_reg { script: """ sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ + -gc $GC \ -p \ -F $GENOMEREF \ -n ${normal} \ @@ -86,6 +158,7 @@ process seqz_sequenza_reg { """ } + process seqz_sequenza { container = "${params.containers.logan}" label 'process_low' @@ -100,7 +173,7 @@ process seqz_sequenza { script: """ sequenza-utils bam2seqz \ - -gc ${SEQUENZAGC} \ + -gc $GC \ -p \ -F $GENOMEREF \ -n ${normal} \ @@ -113,69 +186,3 @@ process seqz_sequenza { touch "${tumorname}_${normalname}_${chr}.seqz.gz" """ } - - - - -process sequenza { - container = "${params.containers.logan}" - label 'process_medium' - - input: - tuple val(pairid), path(seqz) - - output: - tuple val(pairid), - path("${pairid}_alternative_solutions.txt"), - path("${pairid}_alternative_fit.pdf"), - path("${pairid}_model_fit.pdf"), - path("${pairid}_confints_CP.txt"), - path("${pairid}_CN_bars.pdf"), - path("${pairid}_genome_view.pdf"), - path("${pairid}_chromosome_view.pdf"), - path("${pairid}_mutations.txt"), - path("${pairid}_segments.txt"), - path("${pairid}_CP_contours.pdf"), - path("${pairid}_sequenza_cp_table.RData"), - path("${pairid}_chromosome_depths.pdf"), - path("${pairid}_gc_plots.pdf"), - path("${pairid}_sequenza_extract.RData") - - - shell: - ''' - - zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ - sequenza-utils seqz_binning \ - -w 100 \ - -s - > !{pairid}.bin100.seqz - - Rscript !{SEQUENZA_SCRIPT} \ - !{pairid}.bin100.seqz \ - . \ - !{pairid} \ - !{task.cpus} - - ''' - - stub: - - """ - touch "${pairid}_alternative_solutions.txt" - touch "${pairid}_alternative_fit.pdf" - touch "${pairid}_model_fit.pdf" - touch "${pairid}_confints_CP.txt" - touch "${pairid}_CN_bars.pdf" - touch "${pairid}_genome_view.pdf" - touch "${pairid}_chromosome_view.pdf" - touch "${pairid}_mutations.txt" - touch "${pairid}_segments.txt" - touch "${pairid}_CP_contours.pdf" - touch "${pairid}_sequenza_cp_table.RData" - touch "${pairid}_chromosome_depths.pdf" - touch "${pairid}_gc_plots.pdf" - touch "${pairid}_sequenza_extract.RData" - - """ - -} From 1eed795c8525db759ab8f6fa61db6d9112f4d848 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 16 Apr 2025 22:44:35 -0400 Subject: [PATCH 08/14] feat: allow option for no trimming --- nextflow.config | 3 +- subworkflows/local/workflows.nf | 45 ++++++++++++++++++++------- subworkflows/local/workflows_tonly.nf | 4 +++ 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/nextflow.config b/nextflow.config index d740955..56bcdec 100644 --- a/nextflow.config +++ b/nextflow.config @@ -54,7 +54,8 @@ params { no_tonly=null ffpe=null exome=null - + no_trim=null + //Set all Inputs to null sample_sheet=null fastq_file_input=null diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 3c6676e..2290afb 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -197,6 +197,10 @@ workflow ALIGN { return tuple(samplename,fq) } } | flatten() + }else if (params.no_trim){ + fastp_out=fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])} + fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])}| bwamem2 + alignment_out=bwamem2.out }else{ fastp_out = fastp(fastqinput) | map{sample,f1,f2,json,html -> tuple(sample,f1,f2)} bwamem2(fastp_out) @@ -959,16 +963,22 @@ workflow CNVmouse { main: cnvcall_list = params.cnvcallers.split(',') as List - //Sequenza (Preferred for Paired) + //Sequenza if ("sequenza" in cnvcall_list){ - chrs=Channel.fromList(params.genomes[params.genome].chromosomes) - seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> - tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} - seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() - | sequenza + if (params.exome){ + windowsize=Channel.value(50) + }else{ + windowsize=Channel.value(200) + } + chrs=Channel.fromList(params.genomes[params.genome].chromosomes) + seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> + tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} + seqzin.combine(chrs) | seqz_sequenza_bychr + seqz_sequenza_bychr.out.groupTuple() + | combine(windowsize) + | sequenza + } - } //FREEC Paired Mode if ("freec" in cnvcall_list){ if(params.exome){ @@ -1028,11 +1038,17 @@ workflow CNVhuman { //Sequenza if ("sequenza" in cnvcall_list){ + if (params.exome){ + windowsize=Channel.value(50) + }else{ + windowsize=Channel.value(200) + } chrs=Channel.fromList(params.genomes[params.genome].chromosomes) seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() + seqz_sequenza_bychr.out.groupTuple() + | combine(windowsize) | sequenza } @@ -1098,14 +1114,19 @@ workflow CNVhuman_novc { if ("sequenza" in cnvcall_list){ - //Sequenza + if (params.exome){ + windowsize=Channel.value(50) + }else{ + windowsize=Channel.value(200) + } chrs=Channel.fromList(params.genomes[params.genome].chromosomes) seqzin=bamwithsample.map{tname,tumor,tbai,nname,norm,nbai-> tuple("${tname}_${nname}",tname,tumor,tbai,nname,norm,nbai)} seqzin.combine(chrs) | seqz_sequenza_bychr - seqz_sequenza_bychr.out.groupTuple() + seqz_sequenza_bychr.out.groupTuple() + | combine(windowsize) | sequenza - } + } if ("freec" in cnvcall_list){ //FREEC diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 8123f09..0693cdb 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -157,6 +157,10 @@ workflow ALIGN_TONLY { return tuple(samplename,fq) } } | flatten() + }else if (params.no_trim){ + fastp_out=fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])} + fastqinput | map{sample,fqs -> tuple(sample,fqs[0],fqs[1])}| bwamem2 + alignment_out=bwamem2.out }else{ fastp_out = fastp(fastqinput) | map{sample,f1,f2,json,html -> tuple(sample,f1,f2)} bwamem2(fastp_out) From e064120c8d51aa6c5723be315521b9387237e4b5 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 17 Apr 2025 16:46:09 -0400 Subject: [PATCH 09/14] fix: increase lscratch for apply bqsr --- conf/biowulf.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/biowulf.config b/conf/biowulf.config index 371a41c..3a1f31a 100644 --- a/conf/biowulf.config +++ b/conf/biowulf.config @@ -61,7 +61,7 @@ process { cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 24.GB * task.attempt, 'memory' ) } time = { check_max( 120.h * task.attempt, 'time' ) } - clusterOptions = ' --gres=lscratch:300 ' + clusterOptions = ' --gres=lscratch:500 ' } } From 43a53b260ce116fe6ff774880be597758e9e29a7 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 17 Apr 2025 16:46:38 -0400 Subject: [PATCH 10/14] fix: correct order of manta output --- modules/local/gridss.nf | 9 +++------ subworkflows/local/workflows_tonly.nf | 12 ++++++------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/modules/local/gridss.nf b/modules/local/gridss.nf index ab91c03..44a24b0 100644 --- a/modules/local/gridss.nf +++ b/modules/local/gridss.nf @@ -82,13 +82,10 @@ process gridss_tonly { output: tuple val(tumorname), - path("${tumorname}.vcf.gz"), - path("${tumorname}.vcf.gz.tbi"), + path("${tumorname}.vcf.gz"), path("${tumorname}.vcf.gz.tbi"), path("${tumorname}.vcf.gz.assembly.bam"), - path("${tumorname}.gripss.vcf.gz"), - path("${tumorname}.gripss.vcf.gz.tbi"), - path("${tumorname}.gripss.filtered.vcf.gz"), - path("${tumorname}.gripss.filtered.vcf.gz.tbi") + path("${tumorname}.gripss.vcf.gz"), path("${tumorname}.gripss.vcf.gz.tbi"), + path("${tumorname}.gripss.filtered.vcf.gz"), path("${tumorname}.gripss.filtered.vcf.gz.tbi") script: """ diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 0693cdb..50e88c1 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -494,7 +494,7 @@ workflow SV_TONLY { //Manta if ("manta" in svcall_list){ manta_out=manta_tonly(bamwithsample) - .map{tumor, sv, indel, tumorsv -> + .map{tumor, sv, svtbi, indel, indeltbi, tumorsv, tumorsvtbi-> tuple(tumor,tumorsv,"manta_tonly")} annotsv_manta_tonly(manta_out).ifEmpty("Empty SV input--No SV annotated") svout=svout | concat(manta_out) @@ -505,15 +505,15 @@ workflow SV_TONLY { gridss_out=gridss_tonly(bamwithsample) gridss_out_forsv=gridss_out | map{tumor,vcf,index,bam,gripssvcf,gripsstbi,gripssfilt,filttbi -> - tuple(tumor,gripssfilt,"gridss_tonly")} | gunzip_gridss + tuple(tumor,gripssfilt,"gridss_tonly")} | gunzip_gridss annotsv_gridss_tonly(gridss_out_forsv).ifEmpty("Empty SV input--No SV annotated") - svout=svout | concat(gridss_out) + svout=svout | concat(gridss_out_forsv) } //Survivor if (svcall_list.size()>1){ //Survivor - svout | groupTuple + svout | groupTuple | survivor_sv | annotsv_survivor_tonly | ifEmpty("Empty SV input--No SV annotated") @@ -525,8 +525,8 @@ workflow SV_TONLY { tuple(tumor,vcf,index,gripsstbi,gripssfilt,filttbi)} }else if("manta" in svcall_list){ somaticsv_input=manta_out - | map{tumor,gsv,gsv_tbi,so_sv,so_sv_tbi,unfil_sv,unfil_sv_tbi,unfil_indel,unfil_indel_tbi -> - tuple(tumor,unfil_sv,unfil_sv_tbi,so_sv,so_sv_tbi)} + .map{tumor, sv, svtbi, indel, indeltbi, tumorsv, tumorsvtbi-> + tuple(tumor,sv,svtbi,tumorsv,tumorsvtbi)} }else{ somaticsv_input=Channel.empty() } From 391fa7ba0e09880d4bde91f1d1e1d2f0a2fd3f4f Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Fri, 18 Apr 2025 10:53:49 -0400 Subject: [PATCH 11/14] feat: allow processes to fail with an ignore --- modules/local/purple.nf | 13 ++++++++++--- modules/local/sequenza.nf | 9 +++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/local/purple.nf b/modules/local/purple.nf index 2a58dda..6bb864d 100644 --- a/modules/local/purple.nf +++ b/modules/local/purple.nf @@ -16,8 +16,9 @@ GERMLINEHOTSPOTS = file(params.genomes[params.genome].GERMLINEHOTSPOTS) process amber_tonly { container = "${params.containers.logan}" - label 'process_medium' + errorStrategy 'ignore' + input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -49,8 +50,8 @@ process amber_tonly { process amber_tn { container = "${params.containers.logan}" - label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -85,6 +86,7 @@ process amber_tn { process cobalt_tonly { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), path(tumor), path(tumorbai) @@ -116,6 +118,7 @@ process cobalt_tonly { process cobalt_tn { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -149,6 +152,7 @@ process cobalt_tn { process purple { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(id), val(tumorname), val(normalname), @@ -190,6 +194,7 @@ process purple { process purple_novc { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(id), val(tumorname), val(normalname), @@ -229,6 +234,7 @@ process purple_novc { process purple_tonly { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(tumorname), @@ -269,7 +275,8 @@ process purple_tonly { process purple_tonly_novc { container = "${params.containers.logan}" label 'process_medium' - + errorStrategy 'ignore' + input: tuple val(tumorname), val(normalname), path(cobaltin), path(amberin) diff --git a/modules/local/sequenza.nf b/modules/local/sequenza.nf index 31cb71b..e6e9530 100644 --- a/modules/local/sequenza.nf +++ b/modules/local/sequenza.nf @@ -11,6 +11,7 @@ SEQUENZA_SCRIPT = params.script_sequenza process seqz_sequenza_bychr { container = "${params.containers.logan}" label 'process_long' + errorStrategy 'ignore' input: tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), @@ -41,6 +42,7 @@ process seqz_sequenza_bychr { process sequenza { container = "${params.containers.logan}" label 'process_medium' + errorStrategy 'ignore' input: tuple val(pairid), path(seqz), val(window) @@ -62,10 +64,8 @@ process sequenza { path("${pairid}_gc_plots.pdf"), path("${pairid}_sequenza_extract.RData") - shell: ''' - zcat !{seqz} | awk '{if (NR==1) {print $0} else {if ($1!="chromosome"){print $0}}}' |\ sequenza-utils seqz_binning \ -w !{window} \ @@ -76,11 +76,9 @@ process sequenza { . \ !{pairid} \ !{task.cpus} - ''' stub: - """ touch "${pairid}_alternative_solutions.txt" touch "${pairid}_alternative_fit.pdf" @@ -104,10 +102,12 @@ process sequenza { +//**NOTE**: This process is not used in the pipeline, but is kept for reference process pileup_sequenza { container = "${params.containers.logan}" label 'process_low' + errorStrategy 'ignore' input: tuple val(pairid), val(name), @@ -133,6 +133,7 @@ process pileup_sequenza { process seqz_sequenza_reg { container = "${params.containers.logan}" label 'process_low' + errorStrategy 'ignore' input: tuple val(pairid), val(tumorname), path(tumor), path(tumorbai), From 29a27c240d93d02c716cc7710bbab9e1dede295e Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Sun, 27 Apr 2025 23:09:11 -0400 Subject: [PATCH 12/14] fix: correct vardict filtering --- subworkflows/local/workflows_tonly.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/workflows_tonly.nf b/subworkflows/local/workflows_tonly.nf index 50e88c1..1e8069a 100644 --- a/subworkflows/local/workflows_tonly.nf +++ b/subworkflows/local/workflows_tonly.nf @@ -285,7 +285,7 @@ workflow VC_TONLY { if ("vardict" in call_list){ vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple() | map{tumor,vcf-> - tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly","-i 'SBF<0.1 && QUAL >20 && DP >20'")} + tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly","-i 'SBF<0.1 && QUAL >20 && INFO/DP >20'")} | combineVariants_vardict_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)} From 85066778c052ab23bab00f2ec7bbfa596fe8a5ad Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Sun, 27 Apr 2025 23:09:23 -0400 Subject: [PATCH 13/14] fix: hg19 PON error --- conf/genomes.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/genomes.config b/conf/genomes.config index 2cf3496..94e2ef6 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -73,7 +73,8 @@ params { dbsnp = "/fdb/GATK_resource_bundle/hg19-2.8/dbsnp_138.hg19.vcf.gz" germline_resource = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz" gnomad = '--germline-resource /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/af-only-gnomad.raw.sites.liftover.hg19.vcf.gz' - pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" + PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" + tonly_PON = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg19/GATKbundle/hg19.liftGRCh37.noCOSMIC_ClinVar.pon.vcf.gz" KRAKENBACDB = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2" snpeff_genome = "GRCh37.75" snpeff_config = "/usr/local/apps/snpEff/4.3t/snpEff.config" From dafa768906537fecc9681f0f17b89b03c795e779 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 1 May 2025 21:24:22 -0400 Subject: [PATCH 14/14] fix: correct hotspot path --- modules/local/purple.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/purple.nf b/modules/local/purple.nf index 6bb864d..c89dc39 100644 --- a/modules/local/purple.nf +++ b/modules/local/purple.nf @@ -257,7 +257,7 @@ process purple_tonly { $ENSEMBLCACHE \ -somatic_vcf ${somaticvcf} \ -driver_gene_panel $DRIVERS \ - -somatic_hotspots $HOTSPOTS \ + -somatic_hotspots $SOMATICHOTSPOTS \ -threads $task.cpus \ -output_dir ${tumorname} """