From 5abc522612c2f51441b51f517b9f6c25616156e6 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Mon, 13 May 2019 16:52:57 -0400 Subject: [PATCH 01/15] Adding FastQC to optimus and SS2 --- library/tasks/FastQC.wdl | 50 +++++++++++++++++++ pipelines/optimus/Optimus.wdl | 11 ++++ .../SmartSeq2SingleSample.wdl | 6 +++ 3 files changed, 67 insertions(+) create mode 100644 library/tasks/FastQC.wdl diff --git a/library/tasks/FastQC.wdl b/library/tasks/FastQC.wdl new file mode 100644 index 000000000..15d636751 --- /dev/null +++ b/library/tasks/FastQC.wdl @@ -0,0 +1,50 @@ +version 1.0 + +workflow testFastQC { + input { + Array[File] fastqs + } + + call FastQC { + input: + fastq_files = fastqs + } + +} + +task FastQC { + input { + Array[File] fastq_files + String docker = "quay.io/biocontainers/fastqc:0.11.8--1" + Int machine_mem_mb = 3850 + Int disk = ceil(size(fastq_files, "Gi") * 2.2) + Int preemptible = 3 + } + + parameter_meta { + fastq_files : "input fastq files" + docker : "(optional) the docker image containing the runtime environment for this task" + disk: "(optional) the amount of disk space (GiB) to provision for this task" + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" + } + + command <<< + set -e + + mkdir outputs + fastqc ~{sep=' ' fastq_files} -o outputs + >>> + + runtime { + docker: docker + memory: "${machine_mem_mb} MiB" + disks: "local-disk ${disk} HDD" + preemptible: preemptible + } + + output { + Array[File] fastqc_htmls = glob("outputs/*.html") + Array[File] fastqc_zips = glob("outputs/*.zip") + } +} \ No newline at end of file diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index d40f73fb8..87d69beba 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -11,6 +11,7 @@ import "RunEmptyDrops.wdl" as RunEmptyDrops import "ZarrUtils.wdl" as ZarrUtils import "Picard.wdl" as Picard import "UmiCorrection.wdl" as UmiCorrection +import "FastQC.wdl" as FastQC workflow Optimus { meta { @@ -77,6 +78,11 @@ workflow Optimus { r2_unmapped_bam = FastqToUBam.bam_output, whitelist = whitelist } + + call FastQC.FastQC as FastQC { + input: + fastq_files = [r1_fastq[index], r2_fastq[index], non_optional_i1_fastq[index]] + } } # if the index is not passed, proceed without it. @@ -87,6 +93,11 @@ workflow Optimus { r2_unmapped_bam = FastqToUBam.bam_output, whitelist = whitelist } + + call FastQC.FastQC as FastQC { + input: + fastq_files = [r1_fastq[index], r2_fastq[index]] + } } File barcoded_bam = select_first([AttachBarcodes.bam_output, AttachBarcodesNoIndex.bam_output]) diff --git a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl index 8e92aa4b4..4c8cd4c6b 100644 --- a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl +++ b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl @@ -3,6 +3,7 @@ import "Picard.wdl" as Picard import "RSEM.wdl" as RSEM import "GroupMetricsOutputs.wdl" as GroupQCs import "ZarrUtils.wdl" as ZarrUtils +import "FastQC.wdl" as FastQC workflow SmartSeq2SingleCell { meta { @@ -54,6 +55,11 @@ workflow SmartSeq2SingleCell { String quality_control_output_basename = output_name + "_qc" + call FastQC.FastQC { + input: + fastq_files = [fastq1, fastq2] + } + call HISAT2.HISAT2PairedEnd { input: hisat2_ref = hisat2_ref_index, From 6646b3cbd8ad636799c5627572e85993fce2f546 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Mon, 13 May 2019 16:59:39 -0400 Subject: [PATCH 02/15] remove test workflow --- library/tasks/FastQC.wdl | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/library/tasks/FastQC.wdl b/library/tasks/FastQC.wdl index 15d636751..d712e8884 100644 --- a/library/tasks/FastQC.wdl +++ b/library/tasks/FastQC.wdl @@ -1,17 +1,5 @@ version 1.0 -workflow testFastQC { - input { - Array[File] fastqs - } - - call FastQC { - input: - fastq_files = fastqs - } - -} - task FastQC { input { Array[File] fastq_files From 497f2a0263ed5acffe68da7fac8166ab8d659123 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Tue, 14 May 2019 12:44:34 -0400 Subject: [PATCH 03/15] run fastqc on only subset of reads in fastqs --- library/tasks/FastQC.wdl | 21 ++++++++++++++++--- pipelines/optimus/Optimus.wdl | 10 +++++++-- .../SmartSeq2SingleSample.wdl | 6 +++++- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/library/tasks/FastQC.wdl b/library/tasks/FastQC.wdl index d712e8884..e8bdcd81d 100644 --- a/library/tasks/FastQC.wdl +++ b/library/tasks/FastQC.wdl @@ -3,6 +3,9 @@ version 1.0 task FastQC { input { Array[File] fastq_files + File? limits_file + Int startRead = 250000 + Int nRead = 250000 String docker = "quay.io/biocontainers/fastqc:0.11.8--1" Int machine_mem_mb = 3850 Int disk = ceil(size(fastq_files, "Gi") * 2.2) @@ -10,18 +13,30 @@ task FastQC { } parameter_meta { - fastq_files : "input fastq files" - docker : "(optional) the docker image containing the runtime environment for this task" + fastq_files: "input fastq files" + limits_file: "(optional) limits file to use with fastqc" + startRead: "(optional) start fastqc at the nth read of the file" + nRead: "(optional) use (at most) n reads for fastqc" + docker: "(optional) the docker image containing the runtime environment for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" + } command <<< set -e mkdir outputs - fastqc ~{sep=' ' fastq_files} -o outputs + declare -a fastqs=() + for fastq in ~{sep=' ' fastq_files} + do + outname=`basename $fastq .fastq.gz`_skip~{startRead}_read~{nRead}.fastq + zcat $fastq | head -n ~{4*(startRead + nRead)} | tail -n ~{4*nRead} > $outname + fastqs+=($outname) + done + + fastqc ${fastqs[@]} -o outputs ~{"--limits " + limits_file} >>> runtime { diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index 87d69beba..6bb9d790f 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -26,6 +26,9 @@ workflow Optimus { Array[File]? i1_fastq String sample_id + # fastqc input + File? fastqc_limits + # organism reference parameters File tar_star_reference File annotations_gtf @@ -52,6 +55,7 @@ workflow Optimus { r2_fastq: "reverse read, contains cDNA fragment generated from captured mRNA" i1_fastq: "(optional) index read, for demultiplexing of multiple samples on one flow cell." sample_id: "name of sample matching this file, inserted into read group header" + fastqc_limits: "(optional) limits file for fastqc" tar_star_reference: "star genome reference" annotations_gtf: "gtf containing annotations for gene tagging (must match star reference)" ref_genome_fasta: "genome fasta file (must match star reference)" @@ -81,7 +85,8 @@ workflow Optimus { call FastQC.FastQC as FastQC { input: - fastq_files = [r1_fastq[index], r2_fastq[index], non_optional_i1_fastq[index]] + fastq_files = [r1_fastq[index], r2_fastq[index], non_optional_i1_fastq[index]], + limits_file = fastqc_limits } } @@ -96,7 +101,8 @@ workflow Optimus { call FastQC.FastQC as FastQC { input: - fastq_files = [r1_fastq[index], r2_fastq[index]] + fastq_files = [r1_fastq[index], r2_fastq[index]], + limits_file = fastqc_limits } } diff --git a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl index 4c8cd4c6b..78d473bfc 100644 --- a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl +++ b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl @@ -29,6 +29,8 @@ workflow SmartSeq2SingleCell { String output_name File fastq1 File fastq2 + # fastqc + File? fastqc_limits Int max_retries = 0 # whether to convert the outputs to Zarr format, by default it's set to true @@ -49,6 +51,7 @@ workflow SmartSeq2SingleCell { output_name: "Output name, can include path" fastq1: "R1 in paired end reads" fastq2: "R2 in paired end reads" + fastqc_limits: "(optional) limits file for fastqc" max_retries: "(optional) retry this number of times if task fails -- use with caution, see skylab README for details" output_zarr: "whether to run the taks that converts the outputs to Zarr format, by default it's true" } @@ -57,7 +60,8 @@ workflow SmartSeq2SingleCell { call FastQC.FastQC { input: - fastq_files = [fastq1, fastq2] + fastq_files = [fastq1, fastq2], + limits_file = fastqc_limits } call HISAT2.HISAT2PairedEnd { From 6f3bda6f1125a59a35b08fb4f37b5a3a2a4286eb Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Tue, 14 May 2019 15:05:50 -0400 Subject: [PATCH 04/15] adding dependencies to fix tests; reverting to draft2 --- library/tasks/FastQC.wdl | 33 +++++++++---------- pipelines/optimus/Optimus.wdl | 8 +++-- .../SmartSeq2SingleSample.wdl | 3 +- test/optimus/pr/dependencies.json | 3 +- test/optimus/scientific/dependencies.json | 3 +- .../pr/dependencies.json | 3 +- 6 files changed, 28 insertions(+), 25 deletions(-) diff --git a/library/tasks/FastQC.wdl b/library/tasks/FastQC.wdl index e8bdcd81d..4be0fe717 100644 --- a/library/tasks/FastQC.wdl +++ b/library/tasks/FastQC.wdl @@ -1,17 +1,14 @@ -version 1.0 - task FastQC { - input { - Array[File] fastq_files - File? limits_file - Int startRead = 250000 - Int nRead = 250000 - String docker = "quay.io/biocontainers/fastqc:0.11.8--1" - Int machine_mem_mb = 3850 - Int disk = ceil(size(fastq_files, "Gi") * 2.2) - Int preemptible = 3 - } - + Array[File] fastq_files + File? limits_file + Int startRead = 250000 + Int nRead = 250000 + String docker = "quay.io/biocontainers/fastqc:0.11.8--1" + Int machine_mem_mb = 3850 + Int disk = 100 + Int preemptible = 3 + + String dollar = "$" parameter_meta { fastq_files: "input fastq files" limits_file: "(optional) limits file to use with fastqc" @@ -29,14 +26,14 @@ task FastQC { mkdir outputs declare -a fastqs=() - for fastq in ~{sep=' ' fastq_files} + for fastq in ${sep=' ' fastq_files} do - outname=`basename $fastq .fastq.gz`_skip~{startRead}_read~{nRead}.fastq - zcat $fastq | head -n ~{4*(startRead + nRead)} | tail -n ~{4*nRead} > $outname - fastqs+=($outname) + outname=`basename ${dollar}fastq .fastq.gz`_skip${startRead}_read${nRead}.fastq + zcat ${dollar}fastq | head -n ${4*(startRead + nRead)} | tail -n ${4*nRead} > ${dollar}outname + fastqs+=(${dollar}outname) done - fastqc ${fastqs[@]} -o outputs ~{"--limits " + limits_file} + fastqc ${dollar}{fastqs[@]} -o outputs ${"--limits " + limits_file} >>> runtime { diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index 6bb9d790f..9c34075cc 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -86,7 +86,8 @@ workflow Optimus { call FastQC.FastQC as FastQC { input: fastq_files = [r1_fastq[index], r2_fastq[index], non_optional_i1_fastq[index]], - limits_file = fastqc_limits + limits_file = fastqc_limits, + disk = ceil((size(r1_fastq[index], "GiB") + size(r2_fastq[index], "GiB") + size(non_optional_i1_fastq[index], "GiB")) * 2.2) } } @@ -99,10 +100,11 @@ workflow Optimus { whitelist = whitelist } - call FastQC.FastQC as FastQC { + call FastQC.FastQC as FastQCNoIndex { input: fastq_files = [r1_fastq[index], r2_fastq[index]], - limits_file = fastqc_limits + limits_file = fastqc_limits, + disk = ceil((size(r1_fastq[index], "GiB") + size(r2_fastq[index], "GiB")) * 2.2) } } diff --git a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl index 78d473bfc..8e85e15dd 100644 --- a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl +++ b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl @@ -61,7 +61,8 @@ workflow SmartSeq2SingleCell { call FastQC.FastQC { input: fastq_files = [fastq1, fastq2], - limits_file = fastqc_limits + limits_file = fastqc_limits, + disk = ceil((size(fastq1, "GiB") + size(fastq2, "GiB")) * 2.2) } call HISAT2.HISAT2PairedEnd { diff --git a/test/optimus/pr/dependencies.json b/test/optimus/pr/dependencies.json index e6add7ae1..b99094612 100644 --- a/test/optimus/pr/dependencies.json +++ b/test/optimus/pr/dependencies.json @@ -14,5 +14,6 @@ "RunEmptyDrops.wdl": "library/tasks/RunEmptyDrops.wdl", "ZarrUtils.wdl": "library/tasks/ZarrUtils.wdl", "Picard.wdl": "library/tasks/Picard.wdl", - "UmiCorrection.wdl": "library/tasks/UmiCorrection.wdl" + "UmiCorrection.wdl": "library/tasks/UmiCorrection.wdl", + "FastQC.wdl": "library/tasks/FastQC.wdl" } diff --git a/test/optimus/scientific/dependencies.json b/test/optimus/scientific/dependencies.json index 15a4b38d4..834f80ec4 100644 --- a/test/optimus/scientific/dependencies.json +++ b/test/optimus/scientific/dependencies.json @@ -12,5 +12,6 @@ "SequenceDataWithMoleculeTagMetrics.wdl": "library/tasks/SequenceDataWithMoleculeTagMetrics.wdl", "TagSortBam.wdl": "library/tasks/TagSortBam.wdl", "Picard.wdl": "library/tasks/Picard.wdl", - "UmiCorrection.wdl": "library/tasks/UmiCorrection.wdl" + "UmiCorrection.wdl": "library/tasks/UmiCorrection.wdl", + "FastQC.wdl": "library/tasks/FastQC.wdl" } diff --git a/test/smartseq2_single_sample/pr/dependencies.json b/test/smartseq2_single_sample/pr/dependencies.json index 2c10e0518..c01f27f4c 100644 --- a/test/smartseq2_single_sample/pr/dependencies.json +++ b/test/smartseq2_single_sample/pr/dependencies.json @@ -5,5 +5,6 @@ "Picard.wdl": "library/tasks/Picard.wdl", "RSEM.wdl": "library/tasks/RSEM.wdl", "GroupMetricsOutputs.wdl": "library/tasks/GroupMetricsOutputs.wdl", - "ZarrUtils.wdl": "library/tasks/ZarrUtils.wdl" + "ZarrUtils.wdl": "library/tasks/ZarrUtils.wdl", + "FastQC.wdl": "library/tasks/FastQC.wdl" } From 23e57ed096134121bdb2aaf92d8e38bed960e1e9 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Tue, 14 May 2019 15:56:53 -0400 Subject: [PATCH 05/15] adjusting disk size calculations --- pipelines/optimus/Optimus.wdl | 4 ++-- pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index 9c34075cc..726849296 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -87,7 +87,7 @@ workflow Optimus { input: fastq_files = [r1_fastq[index], r2_fastq[index], non_optional_i1_fastq[index]], limits_file = fastqc_limits, - disk = ceil((size(r1_fastq[index], "GiB") + size(r2_fastq[index], "GiB") + size(non_optional_i1_fastq[index], "GiB")) * 2.2) + disk = ceil((size(r1_fastq[index], "GiB") + size(r2_fastq[index], "GiB") + size(non_optional_i1_fastq[index], "GiB")) * 1.2 + 10) } } @@ -104,7 +104,7 @@ workflow Optimus { input: fastq_files = [r1_fastq[index], r2_fastq[index]], limits_file = fastqc_limits, - disk = ceil((size(r1_fastq[index], "GiB") + size(r2_fastq[index], "GiB")) * 2.2) + disk = ceil((size(r1_fastq[index], "GiB") + size(r2_fastq[index], "GiB")) * 1.2 + 10) } } diff --git a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl index 8e85e15dd..0c4264674 100644 --- a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl +++ b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl @@ -62,7 +62,7 @@ workflow SmartSeq2SingleCell { input: fastq_files = [fastq1, fastq2], limits_file = fastqc_limits, - disk = ceil((size(fastq1, "GiB") + size(fastq2, "GiB")) * 2.2) + disk = ceil((size(fastq1, "GiB") + size(fastq2, "GiB")) * 1.2 + 10) } call HISAT2.HISAT2PairedEnd { From 2436d2f477fad041bdb1afc2341b301241137fa2 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 15 May 2019 13:30:09 -0400 Subject: [PATCH 06/15] tests: --- pipelines/optimus/Optimus.wdl | 4 ++++ .../SmartSeq2SingleSample.wdl | 4 ++++ test/optimus/pr/ValidateOptimus.wdl | 18 ++++++++++++++++++ test/optimus/pr/test_inputs.json | 6 ++++++ test/optimus/pr/test_optimus_PR.wdl | 8 +++++++- .../pr/ValidateSmartSeq2SingleCell.wdl | 19 +++++++++++++++++++ .../pr/test_inputs.json | 4 +++- .../pr/test_smartseq2_single_sample_PR.wdl | 7 ++++++- 8 files changed, 67 insertions(+), 3 deletions(-) diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index 726849296..7642b5423 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -241,5 +241,9 @@ workflow Optimus { # zarr Array[File]? zarr_output_files = OptimusZarrConversion.zarr_output_files + + # fastqc + Array[Array[File]] fastqc_htmls = select_first(FastQC.fastqc_htmls,FastQCNoIndex.fastqc_htmls) + Array[Array[File]] fastqc_zips = select_first(FastQC.fastqc_zips,FastQCNoIndex.fastqc_zips) } } diff --git a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl index 0c4264674..ec4159f63 100644 --- a/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl +++ b/pipelines/smartseq2_single_sample/SmartSeq2SingleSample.wdl @@ -160,5 +160,9 @@ workflow SmartSeq2SingleCell { # zarr Array[File]? zarr_output_files = SmartSeq2ZarrConversion.zarr_output_files + + #fastqc + Array[File] fastqc_htmls = FastQC.fastqc_htmls + Array[File] fastqc_zips = FastQC.fastqc_zips } } diff --git a/test/optimus/pr/ValidateOptimus.wdl b/test/optimus/pr/ValidateOptimus.wdl index 34ed4f321..467dda8d2 100644 --- a/test/optimus/pr/ValidateOptimus.wdl +++ b/test/optimus/pr/ValidateOptimus.wdl @@ -3,6 +3,8 @@ task ValidateOptimus { File matrix File gene_metrics File cell_metrics + Array[File] fastqc_htmls + Array[File] fastqc_zips Int required_disk = ceil((size(bam, "G") + size(matrix, "G")) * 1.1) @@ -10,6 +12,8 @@ task ValidateOptimus { String expected_matrix_hash String expected_gene_metric_hash String expected_cell_metric_hash + Array[String] expected_fastqc_html_hashes + Array[String] expected_fastqc_zip_hashes command <<< @@ -51,6 +55,20 @@ task ValidateOptimus { fail=true fi + for hmtl in ${sep=' ' fastqc_htmls}; do + hash=$(md5sum $html | awk '{print $1}') + if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then + fail=true + fi + done + + for zipfile in ${sep=' ' fastqc_zips}; do + hash=$(md5sum $zipfile | awk '{print $1}') + if [[ " ${sep=' ' expected_fastqc_zip_hashes} " != *" $hash "* ]]; then + fail=true + fi + done + if [ $fail == "true" ]; then exit 1; fi >>> diff --git a/test/optimus/pr/test_inputs.json b/test/optimus/pr/test_inputs.json index 0e33ab9c6..e9d8d811f 100644 --- a/test/optimus/pr/test_inputs.json +++ b/test/optimus/pr/test_inputs.json @@ -3,6 +3,12 @@ "TestOptimusPR.expected_matrix_hash": "aec7a79dc7b85a5d621509a3f4fa2192", "TestOptimusPR.expected_cell_metric_hash": "45cc8be253445201b02d102d2d096a0c", "TestOptimusPR.expected_gene_metric_hash": "d636671dfcff6ec8068987d0f5780334", + "TestOptimusPR.expected_fastqc_html_hashes": ["cb0bbb7d52198c38f2cfa87621c92e31", + "6697992484d279d15683b5124ab75554", + "26e07b94eaff3a7ea3dcc9a191bd1194"], + "TestOptimusPR.expected_fastqc_zip_hashes": ["5fde235436cf1f3b207bc8dba2f1d268", + "0f670305b6e5433feb449978ef23e7e4", + "75cec13e04808d13a874bea44f439dd5"], "TestOptimusPR.r1_fastq": [ "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz" diff --git a/test/optimus/pr/test_optimus_PR.wdl b/test/optimus/pr/test_optimus_PR.wdl index 4d097694e..4c34470a7 100644 --- a/test/optimus/pr/test_optimus_PR.wdl +++ b/test/optimus/pr/test_optimus_PR.wdl @@ -10,6 +10,8 @@ workflow TestOptimusPR { String expected_matrix_hash String expected_gene_metric_hash String expected_cell_metric_hash + Array[String] expected_fastqc_html_hashes + Array[String] expected_fastqc_zip_hashes # Optimus inputs Array[File] r1_fastq @@ -40,10 +42,14 @@ workflow TestOptimusPR { matrix = target.matrix, gene_metrics = target.gene_metrics, cell_metrics = target.cell_metrics, + fastqc_htmls = flatten(target.fastqc_htmls), + fastqc_zips = flatten(target.fastqc_zips), expected_bam_hash = expected_bam_hash, expected_matrix_hash = expected_matrix_hash, expected_cell_metric_hash = expected_cell_metric_hash, - expected_gene_metric_hash = expected_gene_metric_hash + expected_gene_metric_hash = expected_gene_metric_hash, + expected_fastqc_html_hashes = expected_fastqc_html_hashes, + expected_fastqc_zip_hashes = expected_fastqc_zip_hashes } } diff --git a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl index 4f0244148..ff721c914 100644 --- a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl +++ b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl @@ -5,6 +5,11 @@ task ValidateSmartSeq2SingleCell { File target_metrics String expected_metrics_hash + Array[File] fastqc_zips + Array[File] fastqc_htmls + Array[String] expected_fastqc_zip_hashes + Array[String] expected_fastqc_html_hashes + command <<< # catch intermittent failures @@ -28,6 +33,20 @@ task ValidateSmartSeq2SingleCell { fail=true fi + for hmtl in ${sep=' ' fastqc_htmls}; do + hash=$(md5sum $html | awk '{print $1}') + if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then + fail=true + fi + done + + for zipfile in ${sep=' ' fastqc_zips}; do + hash=$(md5sum $zipfile | awk '{print $1}') + if [[ " ${sep=' ' expected_fastqc_zip_hashes} " != *" $hash "* ]]; then + fail=true + fi + done + if [ $fail == "true" ]; then exit 1; fi >>> diff --git a/test/smartseq2_single_sample/pr/test_inputs.json b/test/smartseq2_single_sample/pr/test_inputs.json index b28535cba..d689cf26b 100644 --- a/test/smartseq2_single_sample/pr/test_inputs.json +++ b/test/smartseq2_single_sample/pr/test_inputs.json @@ -14,5 +14,7 @@ "TestSmartSeq2SingleCellPR.sample_name":"SRR1294925", "TestSmartSeq2SingleCellPR.output_name":"SRR1294925", "TestSmartSeq2SingleCellPR.expected_counts_hash": "135a3fbb959583db17713dc8b9d7fe33", - "TestSmartSeq2SingleCellPR.expected_metrics_hash": "99bd9903ac8dc77eb1c047ffa8eb42ed" + "TestSmartSeq2SingleCellPR.expected_metrics_hash": "99bd9903ac8dc77eb1c047ffa8eb42ed", + "TestSmartSeq2SingleCellPR.expected_fastqc_zip_hashes": ["2080c23507ce431aab4491f8aecfff39","7ad3130042307ca8db85e228e1a30f77"], + "TestSmartSeq2SingleCellPR.expected_fastqc_html_hashes": ["7e126084d99342c7dd78e7b80429d9f3","26934543205d10a70488ba7920557e42"] } diff --git a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl index 75b7826f8..e3f29fb38 100644 --- a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl +++ b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl @@ -23,6 +23,8 @@ workflow TestSmartSeq2SingleCellPR { String output_name File fastq1 File fastq2 + Array[String] expected_fastqc_zip_hashes + Array[String] expected_fastqc_html_hashes call target_wdl.SmartSeq2SingleCell as target_workflow { input: @@ -47,7 +49,10 @@ workflow TestSmartSeq2SingleCellPR { counts = target_workflow.rsem_gene_results, expected_counts_hash = expected_counts_hash, target_metrics = target_workflow.insert_size_metrics, - expected_metrics_hash = expected_metrics_hash + expected_metrics_hash = expected_metrics_hash, + fastqc_htmls = flatten(target_workflow.fastqc_htmls), + fastqc_zips = flatten(target_workflow.fastqc_zips), + } } From 0905e0458b6d0a6fbc196321d67bb62e79e2acce Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 15 May 2019 13:43:17 -0400 Subject: [PATCH 07/15] fixing tests --- pipelines/optimus/Optimus.wdl | 4 ++-- .../pr/test_smartseq2_single_sample_PR.wdl | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index 7642b5423..eb5dbcc42 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -243,7 +243,7 @@ workflow Optimus { Array[File]? zarr_output_files = OptimusZarrConversion.zarr_output_files # fastqc - Array[Array[File]] fastqc_htmls = select_first(FastQC.fastqc_htmls,FastQCNoIndex.fastqc_htmls) - Array[Array[File]] fastqc_zips = select_first(FastQC.fastqc_zips,FastQCNoIndex.fastqc_zips) + Array[Array[File]] fastqc_htmls = select_first([FastQC.fastqc_htmls,FastQCNoIndex.fastqc_htmls]) + Array[Array[File]] fastqc_zips = select_first([FastQC.fastqc_zips,FastQCNoIndex.fastqc_zips]) } } diff --git a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl index e3f29fb38..ba553a42e 100644 --- a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl +++ b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl @@ -50,9 +50,8 @@ workflow TestSmartSeq2SingleCellPR { expected_counts_hash = expected_counts_hash, target_metrics = target_workflow.insert_size_metrics, expected_metrics_hash = expected_metrics_hash, - fastqc_htmls = flatten(target_workflow.fastqc_htmls), - fastqc_zips = flatten(target_workflow.fastqc_zips), - + fastqc_htmls = target_workflow.fastqc_htmls, + fastqc_zips = target_workflow.fastqc_zips } } From 5adc1a62eba426be8c0b61d9d49de73a9cf3ceea Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 15 May 2019 13:59:25 -0400 Subject: [PATCH 08/15] fixing tests --- pipelines/optimus/Optimus.wdl | 6 ++++-- .../pr/test_smartseq2_single_sample_PR.wdl | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index eb5dbcc42..8490bedb7 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -109,6 +109,8 @@ workflow Optimus { } File barcoded_bam = select_first([AttachBarcodes.bam_output, AttachBarcodesNoIndex.bam_output]) + Array[File] fastqc_output_htmls = select_first([FastQC.fastqc_htmls,FastQCNoIndex.fastqc_htmls]) + Array[File] fastqc_output_zips = select_first([FastQC.fastqc_zips,FastQCNoIndex.fastqc_zips]) } call Merge.MergeSortBamFiles as MergeUnsorted { @@ -243,7 +245,7 @@ workflow Optimus { Array[File]? zarr_output_files = OptimusZarrConversion.zarr_output_files # fastqc - Array[Array[File]] fastqc_htmls = select_first([FastQC.fastqc_htmls,FastQCNoIndex.fastqc_htmls]) - Array[Array[File]] fastqc_zips = select_first([FastQC.fastqc_zips,FastQCNoIndex.fastqc_zips]) + Array[Array[File]] fastqc_htmls = fastqc_output_htmls + Array[Array[File]] fastqc_zips = fastqc_output_zips } } diff --git a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl index ba553a42e..bf00ce994 100644 --- a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl +++ b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl @@ -51,7 +51,9 @@ workflow TestSmartSeq2SingleCellPR { target_metrics = target_workflow.insert_size_metrics, expected_metrics_hash = expected_metrics_hash, fastqc_htmls = target_workflow.fastqc_htmls, - fastqc_zips = target_workflow.fastqc_zips + fastqc_zips = target_workflow.fastqc_zips, + expected_fastqc_zip_hashes = expected_fastqc_zip_hashes, + expected_fastqc_html_hashes = expected_fastqc_html_hashes } } From e8669094806e723ef457e0c2bc4f8a765d2b3c37 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 15 May 2019 14:41:35 -0400 Subject: [PATCH 09/15] tests --- test/optimus/pr/ValidateOptimus.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/optimus/pr/ValidateOptimus.wdl b/test/optimus/pr/ValidateOptimus.wdl index 467dda8d2..8c17aea34 100644 --- a/test/optimus/pr/ValidateOptimus.wdl +++ b/test/optimus/pr/ValidateOptimus.wdl @@ -58,6 +58,7 @@ task ValidateOptimus { for hmtl in ${sep=' ' fastqc_htmls}; do hash=$(md5sum $html | awk '{print $1}') if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then + >&2 echo "fastq_html_hash ($hash) did not match expected hash (${expected_fastqc_html_hashes})" fail=true fi done @@ -65,6 +66,7 @@ task ValidateOptimus { for zipfile in ${sep=' ' fastqc_zips}; do hash=$(md5sum $zipfile | awk '{print $1}') if [[ " ${sep=' ' expected_fastqc_zip_hashes} " != *" $hash "* ]]; then + >&2 echo "fastq_zip_hash ($hash) did not match expected hash (${expected_fastqc_zip_hashes})" fail=true fi done From 78b4ef324d896eb4b95dd76bfb4344f9d31e9936 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 15 May 2019 16:24:16 -0400 Subject: [PATCH 10/15] tests --- test/optimus/pr/ValidateOptimus.wdl | 8 ++++---- test/optimus/pr/test_inputs.json | 6 +++--- .../pr/ValidateSmartSeq2SingleCell.wdl | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/optimus/pr/ValidateOptimus.wdl b/test/optimus/pr/ValidateOptimus.wdl index 8c17aea34..2545a2eea 100644 --- a/test/optimus/pr/ValidateOptimus.wdl +++ b/test/optimus/pr/ValidateOptimus.wdl @@ -55,10 +55,10 @@ task ValidateOptimus { fail=true fi - for hmtl in ${sep=' ' fastqc_htmls}; do - hash=$(md5sum $html | awk '{print $1}') + for htmlfile in ${sep=' ' fastqc_htmls}; do + hash=$(md5sum $htmlfile | awk '{print $1}') if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then - >&2 echo "fastq_html_hash ($hash) did not match expected hash (${expected_fastqc_html_hashes})" + >&2 echo "fastq_html_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_html_hashes})" fail=true fi done @@ -66,7 +66,7 @@ task ValidateOptimus { for zipfile in ${sep=' ' fastqc_zips}; do hash=$(md5sum $zipfile | awk '{print $1}') if [[ " ${sep=' ' expected_fastqc_zip_hashes} " != *" $hash "* ]]; then - >&2 echo "fastq_zip_hash ($hash) did not match expected hash (${expected_fastqc_zip_hashes})" + >&2 echo "fastq_zip_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_zip_hashes})" fail=true fi done diff --git a/test/optimus/pr/test_inputs.json b/test/optimus/pr/test_inputs.json index e9d8d811f..61a82d7a9 100644 --- a/test/optimus/pr/test_inputs.json +++ b/test/optimus/pr/test_inputs.json @@ -6,9 +6,9 @@ "TestOptimusPR.expected_fastqc_html_hashes": ["cb0bbb7d52198c38f2cfa87621c92e31", "6697992484d279d15683b5124ab75554", "26e07b94eaff3a7ea3dcc9a191bd1194"], - "TestOptimusPR.expected_fastqc_zip_hashes": ["5fde235436cf1f3b207bc8dba2f1d268", - "0f670305b6e5433feb449978ef23e7e4", - "75cec13e04808d13a874bea44f439dd5"], + "TestOptimusPR.expected_fastqc_zip_hashes": ["5e26169ee4d626328146065e192a966f", + "df5188daf7bb40e2915caca6213e3585", + "b11a50d4b0945fe6c49a84dbe3181aea"], "TestOptimusPR.r1_fastq": [ "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz" diff --git a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl index ff721c914..85a22c0bf 100644 --- a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl +++ b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl @@ -33,8 +33,8 @@ task ValidateSmartSeq2SingleCell { fail=true fi - for hmtl in ${sep=' ' fastqc_htmls}; do - hash=$(md5sum $html | awk '{print $1}') + for htmlfile in ${sep=' ' fastqc_htmls}; do + hash=$(md5sum $htmlfile | awk '{print $1}') if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then fail=true fi From b2965330cd4dcd8122cf83bfdea088cca4a174da Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Wed, 15 May 2019 17:50:27 -0400 Subject: [PATCH 11/15] tests should now pass... --- test/optimus/pr/ValidateOptimus.wdl | 11 ----------- test/optimus/pr/test_inputs.json | 3 --- test/optimus/pr/test_optimus_PR.wdl | 5 +---- .../pr/ValidateSmartSeq2SingleCell.wdl | 10 +--------- test/smartseq2_single_sample/pr/test_inputs.json | 3 +-- .../pr/test_smartseq2_single_sample_PR.wdl | 3 --- 6 files changed, 3 insertions(+), 32 deletions(-) diff --git a/test/optimus/pr/ValidateOptimus.wdl b/test/optimus/pr/ValidateOptimus.wdl index 2545a2eea..79b757b15 100644 --- a/test/optimus/pr/ValidateOptimus.wdl +++ b/test/optimus/pr/ValidateOptimus.wdl @@ -4,8 +4,6 @@ task ValidateOptimus { File gene_metrics File cell_metrics Array[File] fastqc_htmls - Array[File] fastqc_zips - Int required_disk = ceil((size(bam, "G") + size(matrix, "G")) * 1.1) String expected_bam_hash @@ -13,7 +11,6 @@ task ValidateOptimus { String expected_gene_metric_hash String expected_cell_metric_hash Array[String] expected_fastqc_html_hashes - Array[String] expected_fastqc_zip_hashes command <<< @@ -63,14 +60,6 @@ task ValidateOptimus { fi done - for zipfile in ${sep=' ' fastqc_zips}; do - hash=$(md5sum $zipfile | awk '{print $1}') - if [[ " ${sep=' ' expected_fastqc_zip_hashes} " != *" $hash "* ]]; then - >&2 echo "fastq_zip_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_zip_hashes})" - fail=true - fi - done - if [ $fail == "true" ]; then exit 1; fi >>> diff --git a/test/optimus/pr/test_inputs.json b/test/optimus/pr/test_inputs.json index 61a82d7a9..fd8f4569f 100644 --- a/test/optimus/pr/test_inputs.json +++ b/test/optimus/pr/test_inputs.json @@ -6,9 +6,6 @@ "TestOptimusPR.expected_fastqc_html_hashes": ["cb0bbb7d52198c38f2cfa87621c92e31", "6697992484d279d15683b5124ab75554", "26e07b94eaff3a7ea3dcc9a191bd1194"], - "TestOptimusPR.expected_fastqc_zip_hashes": ["5e26169ee4d626328146065e192a966f", - "df5188daf7bb40e2915caca6213e3585", - "b11a50d4b0945fe6c49a84dbe3181aea"], "TestOptimusPR.r1_fastq": [ "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz" diff --git a/test/optimus/pr/test_optimus_PR.wdl b/test/optimus/pr/test_optimus_PR.wdl index 4c34470a7..a5a34f27e 100644 --- a/test/optimus/pr/test_optimus_PR.wdl +++ b/test/optimus/pr/test_optimus_PR.wdl @@ -11,7 +11,6 @@ workflow TestOptimusPR { String expected_gene_metric_hash String expected_cell_metric_hash Array[String] expected_fastqc_html_hashes - Array[String] expected_fastqc_zip_hashes # Optimus inputs Array[File] r1_fastq @@ -43,13 +42,11 @@ workflow TestOptimusPR { gene_metrics = target.gene_metrics, cell_metrics = target.cell_metrics, fastqc_htmls = flatten(target.fastqc_htmls), - fastqc_zips = flatten(target.fastqc_zips), expected_bam_hash = expected_bam_hash, expected_matrix_hash = expected_matrix_hash, expected_cell_metric_hash = expected_cell_metric_hash, expected_gene_metric_hash = expected_gene_metric_hash, - expected_fastqc_html_hashes = expected_fastqc_html_hashes, - expected_fastqc_zip_hashes = expected_fastqc_zip_hashes + expected_fastqc_html_hashes = expected_fastqc_html_hashes } } diff --git a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl index 85a22c0bf..6e8e8a92a 100644 --- a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl +++ b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl @@ -5,9 +5,7 @@ task ValidateSmartSeq2SingleCell { File target_metrics String expected_metrics_hash - Array[File] fastqc_zips Array[File] fastqc_htmls - Array[String] expected_fastqc_zip_hashes Array[String] expected_fastqc_html_hashes command <<< @@ -36,17 +34,11 @@ task ValidateSmartSeq2SingleCell { for htmlfile in ${sep=' ' fastqc_htmls}; do hash=$(md5sum $htmlfile | awk '{print $1}') if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then + >&2 echo "fastq_html_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_html_hashes})" fail=true fi done - for zipfile in ${sep=' ' fastqc_zips}; do - hash=$(md5sum $zipfile | awk '{print $1}') - if [[ " ${sep=' ' expected_fastqc_zip_hashes} " != *" $hash "* ]]; then - fail=true - fi - done - if [ $fail == "true" ]; then exit 1; fi >>> diff --git a/test/smartseq2_single_sample/pr/test_inputs.json b/test/smartseq2_single_sample/pr/test_inputs.json index d689cf26b..d29d475a5 100644 --- a/test/smartseq2_single_sample/pr/test_inputs.json +++ b/test/smartseq2_single_sample/pr/test_inputs.json @@ -15,6 +15,5 @@ "TestSmartSeq2SingleCellPR.output_name":"SRR1294925", "TestSmartSeq2SingleCellPR.expected_counts_hash": "135a3fbb959583db17713dc8b9d7fe33", "TestSmartSeq2SingleCellPR.expected_metrics_hash": "99bd9903ac8dc77eb1c047ffa8eb42ed", - "TestSmartSeq2SingleCellPR.expected_fastqc_zip_hashes": ["2080c23507ce431aab4491f8aecfff39","7ad3130042307ca8db85e228e1a30f77"], - "TestSmartSeq2SingleCellPR.expected_fastqc_html_hashes": ["7e126084d99342c7dd78e7b80429d9f3","26934543205d10a70488ba7920557e42"] + "TestSmartSeq2SingleCellPR.expected_fastqc_html_hashes": ["21aac025893e0488d6dac0cd206ac2a9","d0d16bee2e05441acd8189d8e29849e7"] } diff --git a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl index bf00ce994..06ddf8b6b 100644 --- a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl +++ b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl @@ -23,7 +23,6 @@ workflow TestSmartSeq2SingleCellPR { String output_name File fastq1 File fastq2 - Array[String] expected_fastqc_zip_hashes Array[String] expected_fastqc_html_hashes call target_wdl.SmartSeq2SingleCell as target_workflow { @@ -51,8 +50,6 @@ workflow TestSmartSeq2SingleCellPR { target_metrics = target_workflow.insert_size_metrics, expected_metrics_hash = expected_metrics_hash, fastqc_htmls = target_workflow.fastqc_htmls, - fastqc_zips = target_workflow.fastqc_zips, - expected_fastqc_zip_hashes = expected_fastqc_zip_hashes, expected_fastqc_html_hashes = expected_fastqc_html_hashes } From abb02f0d6278845435b616952e755fbe23827c15 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Thu, 16 May 2019 10:27:11 -0400 Subject: [PATCH 12/15] Phil comments --- pipelines/optimus/Optimus.wdl | 4 ++-- test/optimus/pr/ValidateOptimus.wdl | 7 +++++++ test/optimus/pr/test_inputs.json | 1 + test/optimus/pr/test_optimus_PR.wdl | 7 +++++-- .../pr/ValidateSmartSeq2SingleCell.wdl | 20 +++++++++++++------ .../pr/test_smartseq2_single_sample_PR.wdl | 5 ++++- 6 files changed, 33 insertions(+), 11 deletions(-) diff --git a/pipelines/optimus/Optimus.wdl b/pipelines/optimus/Optimus.wdl index 8490bedb7..c79aafcd5 100644 --- a/pipelines/optimus/Optimus.wdl +++ b/pipelines/optimus/Optimus.wdl @@ -245,7 +245,7 @@ workflow Optimus { Array[File]? zarr_output_files = OptimusZarrConversion.zarr_output_files # fastqc - Array[Array[File]] fastqc_htmls = fastqc_output_htmls - Array[Array[File]] fastqc_zips = fastqc_output_zips + Array[File] fastqc_htmls = flatten(fastqc_output_htmls) + Array[File] fastqc_zips = flatten(fastqc_output_zips) } } diff --git a/test/optimus/pr/ValidateOptimus.wdl b/test/optimus/pr/ValidateOptimus.wdl index 79b757b15..0762a8130 100644 --- a/test/optimus/pr/ValidateOptimus.wdl +++ b/test/optimus/pr/ValidateOptimus.wdl @@ -4,12 +4,14 @@ task ValidateOptimus { File gene_metrics File cell_metrics Array[File] fastqc_htmls + Int n_fastqc_zips Int required_disk = ceil((size(bam, "G") + size(matrix, "G")) * 1.1) String expected_bam_hash String expected_matrix_hash String expected_gene_metric_hash String expected_cell_metric_hash + Int expected_n_fastqc_zips Array[String] expected_fastqc_html_hashes command <<< @@ -60,6 +62,11 @@ task ValidateOptimus { fi done + if [ ${expected_n_fastqc_zips} != ${n_fastqc_zips} ]; then + >&2 echo "number of fastqc zip (${n_fastqc_zips}) did not match expected number (${expected_n_fastqc_zips})" + fail=true + fi + if [ $fail == "true" ]; then exit 1; fi >>> diff --git a/test/optimus/pr/test_inputs.json b/test/optimus/pr/test_inputs.json index fd8f4569f..d70337c03 100644 --- a/test/optimus/pr/test_inputs.json +++ b/test/optimus/pr/test_inputs.json @@ -6,6 +6,7 @@ "TestOptimusPR.expected_fastqc_html_hashes": ["cb0bbb7d52198c38f2cfa87621c92e31", "6697992484d279d15683b5124ab75554", "26e07b94eaff3a7ea3dcc9a191bd1194"], + "TestOptimusPR.expected_n_fastqc_zips": 6, "TestOptimusPR.r1_fastq": [ "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz" diff --git a/test/optimus/pr/test_optimus_PR.wdl b/test/optimus/pr/test_optimus_PR.wdl index a5a34f27e..dc09444f9 100644 --- a/test/optimus/pr/test_optimus_PR.wdl +++ b/test/optimus/pr/test_optimus_PR.wdl @@ -11,6 +11,7 @@ workflow TestOptimusPR { String expected_gene_metric_hash String expected_cell_metric_hash Array[String] expected_fastqc_html_hashes + Int expected_n_fastqc_zips # Optimus inputs Array[File] r1_fastq @@ -41,12 +42,14 @@ workflow TestOptimusPR { matrix = target.matrix, gene_metrics = target.gene_metrics, cell_metrics = target.cell_metrics, - fastqc_htmls = flatten(target.fastqc_htmls), + fastqc_htmls = target.fastqc_htmls, + n_fastq_zips = length(target.fastqc_zips), expected_bam_hash = expected_bam_hash, expected_matrix_hash = expected_matrix_hash, expected_cell_metric_hash = expected_cell_metric_hash, expected_gene_metric_hash = expected_gene_metric_hash, - expected_fastqc_html_hashes = expected_fastqc_html_hashes + expected_fastqc_html_hashes = expected_fastqc_html_hashes, + expected_n_fastqc_zips = expected_n_fastqc_zips } } diff --git a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl index 6e8e8a92a..4974aa12d 100644 --- a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl +++ b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl @@ -8,6 +8,9 @@ task ValidateSmartSeq2SingleCell { Array[File] fastqc_htmls Array[String] expected_fastqc_html_hashes + Int expected_n_fastqc_zips + Int n_fastqc_zips + command <<< # catch intermittent failures @@ -32,12 +35,17 @@ task ValidateSmartSeq2SingleCell { fi for htmlfile in ${sep=' ' fastqc_htmls}; do - hash=$(md5sum $htmlfile | awk '{print $1}') - if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then - >&2 echo "fastq_html_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_html_hashes})" - fail=true - fi - done + hash=$(md5sum $htmlfile | awk '{print $1}') + if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then + >&2 echo "fastq_html_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_html_hashes})" + fail=true + fi + done + + if [ ${expected_n_fastqc_zips} != ${n_fastqc_zips} ]; then + >&2 echo "number of fastqc zip (${n_fastqc_zips}) did not match expected number (${expected_n_fastqc_zips})" + fail=true + fi if [ $fail == "true" ]; then exit 1; fi diff --git a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl index 06ddf8b6b..5fdc9d960 100644 --- a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl +++ b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl @@ -24,6 +24,7 @@ workflow TestSmartSeq2SingleCellPR { File fastq1 File fastq2 Array[String] expected_fastqc_html_hashes + Int expected_n_fastqc_zips call target_wdl.SmartSeq2SingleCell as target_workflow { input: @@ -50,7 +51,9 @@ workflow TestSmartSeq2SingleCellPR { target_metrics = target_workflow.insert_size_metrics, expected_metrics_hash = expected_metrics_hash, fastqc_htmls = target_workflow.fastqc_htmls, - expected_fastqc_html_hashes = expected_fastqc_html_hashes + expected_fastqc_html_hashes = expected_fastqc_html_hashes, + n_fastqc_zips = length(target_workflow.fastqc_zips), + expected_n_fastqc_zips = expected_n_fastqc_zips } } From 2b843082303387adff1f7bbd6b654ac99c277dba Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Thu, 16 May 2019 10:38:27 -0400 Subject: [PATCH 13/15] tests --- test/optimus/pr/test_optimus_PR.wdl | 2 +- test/smartseq2_single_sample/pr/test_inputs.json | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/optimus/pr/test_optimus_PR.wdl b/test/optimus/pr/test_optimus_PR.wdl index dc09444f9..4a8873b4e 100644 --- a/test/optimus/pr/test_optimus_PR.wdl +++ b/test/optimus/pr/test_optimus_PR.wdl @@ -43,7 +43,7 @@ workflow TestOptimusPR { gene_metrics = target.gene_metrics, cell_metrics = target.cell_metrics, fastqc_htmls = target.fastqc_htmls, - n_fastq_zips = length(target.fastqc_zips), + n_fastqc_zips = length(target.fastqc_zips), expected_bam_hash = expected_bam_hash, expected_matrix_hash = expected_matrix_hash, expected_cell_metric_hash = expected_cell_metric_hash, diff --git a/test/smartseq2_single_sample/pr/test_inputs.json b/test/smartseq2_single_sample/pr/test_inputs.json index d29d475a5..7b17303f6 100644 --- a/test/smartseq2_single_sample/pr/test_inputs.json +++ b/test/smartseq2_single_sample/pr/test_inputs.json @@ -15,5 +15,6 @@ "TestSmartSeq2SingleCellPR.output_name":"SRR1294925", "TestSmartSeq2SingleCellPR.expected_counts_hash": "135a3fbb959583db17713dc8b9d7fe33", "TestSmartSeq2SingleCellPR.expected_metrics_hash": "99bd9903ac8dc77eb1c047ffa8eb42ed", - "TestSmartSeq2SingleCellPR.expected_fastqc_html_hashes": ["21aac025893e0488d6dac0cd206ac2a9","d0d16bee2e05441acd8189d8e29849e7"] + "TestSmartSeq2SingleCellPR.expected_fastqc_html_hashes": ["21aac025893e0488d6dac0cd206ac2a9","d0d16bee2e05441acd8189d8e29849e7"], + "TestSmartSeq2SingleCellPR.n_fastqc_zips": 2 } From e794f71b7ea989a4c980fbb9e69296eef1c91f72 Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Thu, 16 May 2019 11:42:14 -0400 Subject: [PATCH 14/15] tests --- test/smartseq2_single_sample/pr/test_inputs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/smartseq2_single_sample/pr/test_inputs.json b/test/smartseq2_single_sample/pr/test_inputs.json index 7b17303f6..2dc99faed 100644 --- a/test/smartseq2_single_sample/pr/test_inputs.json +++ b/test/smartseq2_single_sample/pr/test_inputs.json @@ -16,5 +16,5 @@ "TestSmartSeq2SingleCellPR.expected_counts_hash": "135a3fbb959583db17713dc8b9d7fe33", "TestSmartSeq2SingleCellPR.expected_metrics_hash": "99bd9903ac8dc77eb1c047ffa8eb42ed", "TestSmartSeq2SingleCellPR.expected_fastqc_html_hashes": ["21aac025893e0488d6dac0cd206ac2a9","d0d16bee2e05441acd8189d8e29849e7"], - "TestSmartSeq2SingleCellPR.n_fastqc_zips": 2 + "TestSmartSeq2SingleCellPR.expected_n_fastqc_zips": 2 } From c139eef5af64059bc6bbc26a02439c70e9d39d6d Mon Sep 17 00:00:00 2001 From: Christopher Kachulis Date: Thu, 16 May 2019 15:07:47 -0400 Subject: [PATCH 15/15] fixing tests --- test/optimus/pr/ValidateOptimus.wdl | 20 ++++++++++++------- test/optimus/pr/test_inputs.json | 6 +++--- test/optimus/pr/test_optimus_PR.wdl | 4 ++-- .../pr/ValidateSmartSeq2SingleCell.wdl | 20 ++++++++++++------- .../pr/test_inputs.json | 3 ++- .../pr/test_smartseq2_single_sample_PR.wdl | 4 ++-- 6 files changed, 35 insertions(+), 22 deletions(-) diff --git a/test/optimus/pr/ValidateOptimus.wdl b/test/optimus/pr/ValidateOptimus.wdl index 0762a8130..c8e0e7c6b 100644 --- a/test/optimus/pr/ValidateOptimus.wdl +++ b/test/optimus/pr/ValidateOptimus.wdl @@ -12,7 +12,7 @@ task ValidateOptimus { String expected_gene_metric_hash String expected_cell_metric_hash Int expected_n_fastqc_zips - Array[String] expected_fastqc_html_hashes + Array[String] expected_fastqc_html_strings command <<< @@ -54,12 +54,18 @@ task ValidateOptimus { fail=true fi - for htmlfile in ${sep=' ' fastqc_htmls}; do - hash=$(md5sum $htmlfile | awk '{print $1}') - if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then - >&2 echo "fastq_html_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_html_hashes})" - fail=true - fi + #search for expected strings in fastqc html + for string in "${sep='" "' expected_fastqc_html_strings}"; do + fail_html=true + for htmlfile in ${sep=' ' fastqc_htmls}; do + if grep "$string" $htmlfile; then + fail_html=false + fi + done + if [ $fail_html == "true" ]; then + >&2 echo "expected string ($string) not found in fastqc html files" + fail=true + fi done if [ ${expected_n_fastqc_zips} != ${n_fastqc_zips} ]; then diff --git a/test/optimus/pr/test_inputs.json b/test/optimus/pr/test_inputs.json index d70337c03..7b4b85d17 100644 --- a/test/optimus/pr/test_inputs.json +++ b/test/optimus/pr/test_inputs.json @@ -3,9 +3,9 @@ "TestOptimusPR.expected_matrix_hash": "aec7a79dc7b85a5d621509a3f4fa2192", "TestOptimusPR.expected_cell_metric_hash": "45cc8be253445201b02d102d2d096a0c", "TestOptimusPR.expected_gene_metric_hash": "d636671dfcff6ec8068987d0f5780334", - "TestOptimusPR.expected_fastqc_html_hashes": ["cb0bbb7d52198c38f2cfa87621c92e31", - "6697992484d279d15683b5124ab75554", - "26e07b94eaff3a7ea3dcc9a191bd1194"], + "TestOptimusPR.expected_fastqc_html_strings": ["pbmc8k_S1_L007_I1_001_skip250000_read250000.fastq FastQC Report","Sequence length8", + "pbmc8k_S1_L007_R1_001_skip250000_read250000.fastq FastQC Report","Sequence length26", + "pbmc8k_S1_L007_R2_001_skip250000_read250000.fastq FastQC Report","Sequence length98"], "TestOptimusPR.expected_n_fastqc_zips": 6, "TestOptimusPR.r1_fastq": [ "gs://hca-dcp-mint-test-data/10x/demo/fastqs/pbmc8k_S1_L007_R1_001.fastq.gz", diff --git a/test/optimus/pr/test_optimus_PR.wdl b/test/optimus/pr/test_optimus_PR.wdl index 4a8873b4e..794072acc 100644 --- a/test/optimus/pr/test_optimus_PR.wdl +++ b/test/optimus/pr/test_optimus_PR.wdl @@ -10,7 +10,7 @@ workflow TestOptimusPR { String expected_matrix_hash String expected_gene_metric_hash String expected_cell_metric_hash - Array[String] expected_fastqc_html_hashes + Array[String] expected_fastqc_html_strings # a set of strings to search for in the fastqc html outputs Int expected_n_fastqc_zips # Optimus inputs @@ -48,7 +48,7 @@ workflow TestOptimusPR { expected_matrix_hash = expected_matrix_hash, expected_cell_metric_hash = expected_cell_metric_hash, expected_gene_metric_hash = expected_gene_metric_hash, - expected_fastqc_html_hashes = expected_fastqc_html_hashes, + expected_fastqc_html_strings = expected_fastqc_html_strings, expected_n_fastqc_zips = expected_n_fastqc_zips } diff --git a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl index 4974aa12d..7268176e1 100644 --- a/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl +++ b/test/smartseq2_single_sample/pr/ValidateSmartSeq2SingleCell.wdl @@ -6,7 +6,7 @@ task ValidateSmartSeq2SingleCell { String expected_metrics_hash Array[File] fastqc_htmls - Array[String] expected_fastqc_html_hashes + Array[String] expected_fastqc_html_strings Int expected_n_fastqc_zips Int n_fastqc_zips @@ -34,12 +34,18 @@ task ValidateSmartSeq2SingleCell { fail=true fi - for htmlfile in ${sep=' ' fastqc_htmls}; do - hash=$(md5sum $htmlfile | awk '{print $1}') - if [[ " ${sep=' ' expected_fastqc_html_hashes} " != *" $hash "* ]]; then - >&2 echo "fastq_html_hash ($hash) did not match expected hash (${sep=' ' expected_fastqc_html_hashes})" - fail=true - fi + #search for expected strings in fastqc html + for string in "${sep='" "' expected_fastqc_html_strings}"; do + fail_html=true + for htmlfile in ${sep=' ' fastqc_htmls}; do + if grep "$string" $htmlfile; then + fail_html=false + fi + done + if [ $fail_html == "true" ]; then + >&2 echo "expected string ($string) not found in fastqc html files" + fail=true + fi done if [ ${expected_n_fastqc_zips} != ${n_fastqc_zips} ]; then diff --git a/test/smartseq2_single_sample/pr/test_inputs.json b/test/smartseq2_single_sample/pr/test_inputs.json index 2dc99faed..176e5bfd6 100644 --- a/test/smartseq2_single_sample/pr/test_inputs.json +++ b/test/smartseq2_single_sample/pr/test_inputs.json @@ -15,6 +15,7 @@ "TestSmartSeq2SingleCellPR.output_name":"SRR1294925", "TestSmartSeq2SingleCellPR.expected_counts_hash": "135a3fbb959583db17713dc8b9d7fe33", "TestSmartSeq2SingleCellPR.expected_metrics_hash": "99bd9903ac8dc77eb1c047ffa8eb42ed", - "TestSmartSeq2SingleCellPR.expected_fastqc_html_hashes": ["21aac025893e0488d6dac0cd206ac2a9","d0d16bee2e05441acd8189d8e29849e7"], + "TestSmartSeq2SingleCellPR.expected_fastqc_html_strings": ["SRR1294925_1_skip250000_read250000.fastq FastQC Report","Sequence length25", + "SRR1294925_2_skip250000_read250000.fastq FastQC Report"], "TestSmartSeq2SingleCellPR.expected_n_fastqc_zips": 2 } diff --git a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl index 5fdc9d960..5f7dfd09d 100644 --- a/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl +++ b/test/smartseq2_single_sample/pr/test_smartseq2_single_sample_PR.wdl @@ -23,7 +23,7 @@ workflow TestSmartSeq2SingleCellPR { String output_name File fastq1 File fastq2 - Array[String] expected_fastqc_html_hashes + Array[String] expected_fastqc_html_strings Int expected_n_fastqc_zips call target_wdl.SmartSeq2SingleCell as target_workflow { @@ -51,7 +51,7 @@ workflow TestSmartSeq2SingleCellPR { target_metrics = target_workflow.insert_size_metrics, expected_metrics_hash = expected_metrics_hash, fastqc_htmls = target_workflow.fastqc_htmls, - expected_fastqc_html_hashes = expected_fastqc_html_hashes, + expected_fastqc_html_strings = expected_fastqc_html_strings, n_fastqc_zips = length(target_workflow.fastqc_zips), expected_n_fastqc_zips = expected_n_fastqc_zips }