From fa50706b7da37e3a2685a49ea8969ad12959e018 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 20 Mar 2026 23:12:13 +0800 Subject: [PATCH 01/10] feat: add gcta/makebksparse module --- .../nf-core/gcta/makebksparse/environment.yml | 7 ++ modules/nf-core/gcta/makebksparse/main.nf | 39 ++++++++ modules/nf-core/gcta/makebksparse/meta.yml | 84 ++++++++++++++++ .../gcta/makebksparse/tests/main.nf.test | 96 +++++++++++++++++++ .../gcta/makebksparse/tests/main.nf.test.snap | 72 ++++++++++++++ .../gcta/makebksparse/tests/nextflow.config | 3 + 6 files changed, 301 insertions(+) create mode 100644 modules/nf-core/gcta/makebksparse/environment.yml create mode 100644 modules/nf-core/gcta/makebksparse/main.nf create mode 100644 modules/nf-core/gcta/makebksparse/meta.yml create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/makebksparse/tests/nextflow.config diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf new file mode 100644 index 000000000000..08dde9742be8 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -0,0 +1,39 @@ +process GCTA_MAKEBKSPARSE { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + val(cutoff) + + output: + tuple val(meta), path("*_sp.grm.id"), path("*_sp.grm.sp"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + gcta \\ + --grm ${meta.id} \\ + --make-bK-sparse ${cutoff} \\ + --out ${prefix}_sp \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_sp.grm.id + touch ${prefix}_sp.grm.sp + """ +} diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml new file mode 100644 index 000000000000..a7663d3e0daf --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -0,0 +1,84 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makebksparse" +description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses +keywords: + - gcta + - grm + - sparse + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - cutoff: + type: float + description: Sparse GRM cutoff passed to `--make-bK-sparse` + +output: + sparse_grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "*_sp.grm.id": + type: file + description: Sparse GRM sample identifier file + pattern: "*_sp.grm.id" + ontologies: [] + - "*_sp.grm.sp": + type: file + description: Sparse GRM matrix file + pattern: "*_sp.grm.sp" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test new file mode 100644 index 000000000000..37a3a78367ff --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process GCTA_MAKEBKSPARSE" + script "../main.nf" + process "GCTA_MAKEBKSPARSE" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makebksparse" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - create sparse GRM") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.sparse_grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - create sparse GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap new file mode 100644 index 000000000000..42828835a289 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "homo_sapiens popgen - create sparse GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:15.829559344" + }, + "homo_sapiens popgen - create sparse GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ], + "sparse_grm_files": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:22.049066879" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makebksparse/tests/nextflow.config b/modules/nf-core/gcta/makebksparse/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} From 3006c5e23c754691e83ef9aef7a65a619683872e Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 15:53:17 +0800 Subject: [PATCH 02/10] test(gcta/makebksparse): harden basename contract coverage --- modules/nf-core/gcta/makebksparse/meta.yml | 3 + .../gcta/makebksparse/tests/main.nf.test | 103 +++++++++++------- .../gcta/makebksparse/tests/main.nf.test.snap | 68 ++++++------ 3 files changed, 100 insertions(+), 74 deletions(-) diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml index a7663d3e0daf..1a2a2d9f600f 100644 --- a/modules/nf-core/gcta/makebksparse/meta.yml +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -19,6 +19,8 @@ input: description: | Groovy map containing dense GRM metadata e.g. `[ id:'plink_simulated' ]` + `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`. + Input files must therefore be staged as `.grm.id`, `.grm.bin`, and `.grm.N.bin`. - grm_id: type: file description: Dense GRM sample identifier file @@ -45,6 +47,7 @@ output: description: | Groovy map containing dense GRM metadata e.g. `[ id:'plink_simulated' ]` + `meta.id` is preserved from the input dense GRM basename contract. - "*_sp.grm.id": type: file description: Sparse GRM sample identifier file diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index 37a3a78367ff..ceacc1e5d8a1 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,46 +8,67 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" - tag "gcta/makegrmpart" - setup { - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { - script "../../makegrmpart/main.nf" + test("homo_sapiens popgen - create sparse GRM") { + config "./nextflow.config" + + when { process { """ - file('plink_simulated.mbfile').text = 'plink_simulated\\n' - - input[0] = [ - [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], - file('plink_simulated.mbfile'), - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) - ] - ] - input[1] = [[ id:'all_variants' ], []] + file('contract_dense.grm.id').text = "fid1 iid1\\n" + + grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmBuffer.putFloat(1.0f) + file('contract_dense.grm.bin').bytes = grmBuffer.array() + + grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmNBuffer.putFloat(100.0f) + file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() + + input[0] = Channel.value([ + [ id:'contract_dense' ], + file('contract_dense.grm.id'), + file('contract_dense.grm.bin'), + file('contract_dense.grm.N.bin') + ]) + input[1] = Channel.value(0.05) """ } } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" }, + { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } + ) + } } - test("homo_sapiens popgen - create sparse GRM") { + test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") { config "./nextflow.config" when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + file('contract_dense.grm.id').text = "fid1 iid1\\n" - input[0] = dense_grm + grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmBuffer.putFloat(1.0f) + file('contract_dense.grm.bin').bytes = grmBuffer.array() + + grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmNBuffer.putFloat(100.0f) + file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() + + input[0] = Channel.value([ + [ id:'contract_dense_mismatch' ], + file('contract_dense.grm.id'), + file('contract_dense.grm.bin'), + file('contract_dense.grm.N.bin') + ]) input[1] = Channel.value(0.05) """ } @@ -55,15 +76,8 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, - { - assert snapshot( - process.out.sparse_grm_files, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() - } + { assert !process.success }, + { assert process.exitStatus != 0 } ) } } @@ -75,12 +89,16 @@ nextflow_process { when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + file('stub_dense.grm.id').text = "fid1 iid1\\n" + file('stub_dense.grm.bin').bytes = [] + file('stub_dense.grm.N.bin').bytes = [] - input[0] = dense_grm + input[0] = Channel.value([ + [ id:'stub_dense' ], + file('stub_dense.grm.id'), + file('stub_dense.grm.bin'), + file('stub_dense.grm.N.bin') + ]) input[1] = Channel.value(0.05) """ } @@ -89,7 +107,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" }, + { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } ) } } diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 42828835a289..593c3fd0549f 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -1,15 +1,24 @@ { - "homo_sapiens popgen - create sparse GRM": { + "stub_sparse_grm_files": { "content": [ [ [ { - "id": "plink_simulated_dense.part_1_1" + "id": "stub_dense" }, - "plink_simulated_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "plink_simulated_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:48.770909526" + }, + "versions": { + "content": [ { "versions_gcta": [ [ @@ -24,36 +33,29 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:36:15.829559344" + "timestamp": "2026-03-21T00:30:38.045354436" + }, + "sparse_grm_files": { + "content": [ + [ + [ + { + "id": "contract_dense" + }, + "contract_dense_sp.grm.id:md5,c1fd3a827b108cf8b749e4fced7b7a52", + "contract_dense_sp.grm.sp:md5,9c1256f576632d254861b63586e7a3da" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:38.025448523" }, - "homo_sapiens popgen - create sparse GRM - stub": { + "stub_versions": { "content": [ { - "0": [ - [ - { - "id": "plink_simulated_dense.part_1_1" - }, - "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - "GCTA_MAKEBKSPARSE", - "gcta", - "1.94.1" - ] - ], - "sparse_grm_files": [ - [ - { - "id": "plink_simulated_dense.part_1_1" - }, - "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "versions_gcta": [ [ "GCTA_MAKEBKSPARSE", @@ -67,6 +69,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:36:22.049066879" + "timestamp": "2026-03-21T00:30:48.775770627" } } \ No newline at end of file From c8dc2ea62a8427a59cd49be8bb3da45f1c086194 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 18:47:04 +0800 Subject: [PATCH 03/10] test(gcta/makebksparse): generate GRM inputs in setup --- .../tests/helpers/dense_grm/main.nf | 33 ++++++++ .../gcta/makebksparse/tests/main.nf.test | 78 +++++++++---------- .../gcta/makebksparse/tests/main.nf.test.snap | 6 +- 3 files changed, 72 insertions(+), 45 deletions(-) create mode 100644 modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf diff --git a/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf b/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf new file mode 100644 index 000000000000..deefd4ad119e --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf @@ -0,0 +1,33 @@ +process GCTA_TEST_DENSE_GRM { + tag "${meta.id}" + label "process_medium" + conda "${projectDir}/modules/nf-core/gcta/makebksparse/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + + output: + tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: dense_grm + + script: + def bfile_prefix = bed.baseName + """ + set -euo pipefail + + gcta \\ + --bfile "${bfile_prefix}" \\ + --make-grm \\ + --out "${meta.id}" \\ + --thread-num ${task.cpus} + """ + + stub: + """ + touch "${meta.id}.grm.id" + touch "${meta.id}.grm.bin" + touch "${meta.id}.grm.N.bin" + """ +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index ceacc1e5d8a1..cc8c4c63731c 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,6 +8,37 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" + tag "tests/helpers/dense_grm" + + setup { + run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_CONTRACT") { + script "../tests/helpers/dense_grm/main.nf" + process { + """ + input[0] = [ + [ id:'contract_dense' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + """ + } + } + + run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_STUB") { + script "../tests/helpers/dense_grm/main.nf" + process { + """ + input[0] = [ + [ id:'stub_dense' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + """ + } + } + } test("homo_sapiens popgen - create sparse GRM") { config "./nextflow.config" @@ -15,22 +46,7 @@ nextflow_process { when { process { """ - file('contract_dense.grm.id').text = "fid1 iid1\\n" - - grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmBuffer.putFloat(1.0f) - file('contract_dense.grm.bin').bytes = grmBuffer.array() - - grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmNBuffer.putFloat(100.0f) - file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() - - input[0] = Channel.value([ - [ id:'contract_dense' ], - file('contract_dense.grm.id'), - file('contract_dense.grm.bin'), - file('contract_dense.grm.N.bin') - ]) + input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm input[1] = Channel.value(0.05) """ } @@ -53,22 +69,9 @@ nextflow_process { when { process { """ - file('contract_dense.grm.id').text = "fid1 iid1\\n" - - grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmBuffer.putFloat(1.0f) - file('contract_dense.grm.bin').bytes = grmBuffer.array() - - grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmNBuffer.putFloat(100.0f) - file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() - - input[0] = Channel.value([ - [ id:'contract_dense_mismatch' ], - file('contract_dense.grm.id'), - file('contract_dense.grm.bin'), - file('contract_dense.grm.N.bin') - ]) + input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm.map { meta, grm_id, grm_bin, grm_n_bin -> + [[ id:'contract_dense_mismatch' ], grm_id, grm_bin, grm_n_bin] + } input[1] = Channel.value(0.05) """ } @@ -89,16 +92,7 @@ nextflow_process { when { process { """ - file('stub_dense.grm.id').text = "fid1 iid1\\n" - file('stub_dense.grm.bin').bytes = [] - file('stub_dense.grm.N.bin').bytes = [] - - input[0] = Channel.value([ - [ id:'stub_dense' ], - file('stub_dense.grm.id'), - file('stub_dense.grm.bin'), - file('stub_dense.grm.N.bin') - ]) + input[0] = GCTA_TEST_DENSE_GRM_STUB.out.dense_grm input[1] = Channel.value(0.05) """ } diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 593c3fd0549f..0cf695b718eb 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -42,8 +42,8 @@ { "id": "contract_dense" }, - "contract_dense_sp.grm.id:md5,c1fd3a827b108cf8b749e4fced7b7a52", - "contract_dense_sp.grm.sp:md5,9c1256f576632d254861b63586e7a3da" + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" ] ] ], @@ -51,7 +51,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:30:38.025448523" + "timestamp": "2026-03-21T18:40:37.776832502" }, "stub_versions": { "content": [ From a867357dc25963f9841f711f8b09b1ee6b5fcaed Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 20:35:27 +0800 Subject: [PATCH 04/10] Use real makegrmpart in makebksparse tests --- .../tests/helpers/dense_grm/main.nf | 33 ---------- .../gcta/makebksparse/tests/main.nf.test | 65 ++++++++++++++----- .../gcta/makebksparse/tests/main.nf.test.snap | 16 ++--- 3 files changed, 55 insertions(+), 59 deletions(-) delete mode 100644 modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf diff --git a/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf b/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf deleted file mode 100644 index deefd4ad119e..000000000000 --- a/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process GCTA_TEST_DENSE_GRM { - tag "${meta.id}" - label "process_medium" - conda "${projectDir}/modules/nf-core/gcta/makebksparse/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" - - input: - tuple val(meta), path(bed), path(bim), path(fam) - - output: - tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: dense_grm - - script: - def bfile_prefix = bed.baseName - """ - set -euo pipefail - - gcta \\ - --bfile "${bfile_prefix}" \\ - --make-grm \\ - --out "${meta.id}" \\ - --thread-num ${task.cpus} - """ - - stub: - """ - touch "${meta.id}.grm.id" - touch "${meta.id}.grm.bin" - touch "${meta.id}.grm.N.bin" - """ -} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index cc8c4c63731c..03118bc0ed49 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,33 +8,53 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" - tag "tests/helpers/dense_grm" + tag "gcta/makegrmpart" setup { - run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_CONTRACT") { - script "../tests/helpers/dense_grm/main.nf" + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_CONTRACT") { + script "../../makegrmpart/main.nf" process { """ + file('contract_dense.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:'contract_dense' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + [ id:'contract_dense', part_gcta_job:1, nparts_gcta:1 ], + file('contract_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] + input[1] = [[ id:'all_variants' ], []] """ } } - run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_STUB") { - script "../tests/helpers/dense_grm/main.nf" + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_STUB") { + script "../../makegrmpart/main.nf" process { """ + file('stub_dense.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:'stub_dense' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + [ id:'stub_dense', part_gcta_job:1, nparts_gcta:1 ], + file('stub_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] + input[1] = [[ id:'all_variants' ], []] """ } } @@ -46,7 +66,10 @@ nextflow_process { when { process { """ - input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm + input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } input[1] = Channel.value(0.05) """ } @@ -56,7 +79,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense.part_1_1" }, { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } ) @@ -69,7 +92,10 @@ nextflow_process { when { process { """ - input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm.map { meta, grm_id, grm_bin, grm_n_bin -> + input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + }.map { meta, grm_id, grm_bin, grm_n_bin -> [[ id:'contract_dense_mismatch' ], grm_id, grm_bin, grm_n_bin] } input[1] = Channel.value(0.05) @@ -92,7 +118,10 @@ nextflow_process { when { process { """ - input[0] = GCTA_TEST_DENSE_GRM_STUB.out.dense_grm + input[0] = GCTA_MAKEGRMPART_STUB.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } input[1] = Channel.value(0.05) """ } @@ -102,7 +131,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense.part_1_1" }, { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } ) diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 0cf695b718eb..6cbf64c83810 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -4,10 +4,10 @@ [ [ { - "id": "stub_dense" + "id": "stub_dense.part_1_1" }, - "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + "stub_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] ], @@ -15,7 +15,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:30:48.770909526" + "timestamp": "2026-03-21T20:34:14.13446136" }, "versions": { "content": [ @@ -40,10 +40,10 @@ [ [ { - "id": "contract_dense" + "id": "contract_dense.part_1_1" }, - "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + "contract_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" ] ] ], @@ -51,7 +51,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T18:40:37.776832502" + "timestamp": "2026-03-21T20:33:53.993675061" }, "stub_versions": { "content": [ From 45958d7c99e97ed4e0d5172ed61135ee72dbc1d1 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 20 Mar 2026 23:12:13 +0800 Subject: [PATCH 05/10] feat: add gcta/fastgwa module --- modules/nf-core/gcta/fastgwa/environment.yml | 7 + modules/nf-core/gcta/fastgwa/main.nf | 60 ++++ modules/nf-core/gcta/fastgwa/meta.yml | 139 ++++++++ .../nf-core/gcta/fastgwa/tests/main.nf.test | 299 ++++++++++++++++++ .../gcta/fastgwa/tests/main.nf.test.snap | 141 +++++++++ .../gcta/fastgwa/tests/nextflow.config | 3 + 6 files changed, 649 insertions(+) create mode 100644 modules/nf-core/gcta/fastgwa/environment.yml create mode 100644 modules/nf-core/gcta/fastgwa/main.nf create mode 100644 modules/nf-core/gcta/fastgwa/meta.yml create mode 100644 modules/nf-core/gcta/fastgwa/tests/main.nf.test create mode 100644 modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/fastgwa/tests/nextflow.config diff --git a/modules/nf-core/gcta/fastgwa/environment.yml b/modules/nf-core/gcta/fastgwa/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/fastgwa/main.nf b/modules/nf-core/gcta/fastgwa/main.nf new file mode 100644 index 000000000000..b210462381ab --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/main.nf @@ -0,0 +1,60 @@ +process GCTA_FASTGWA { + tag "${meta.id}:${meta3.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(bed_pgen), path(bim_pvar), path(fam_psam) + tuple val(meta2), path(sparse_grm_id), path(sparse_grm_sp) + tuple val(meta3), path(phenotype_file) + tuple val(meta4), path(quant_covariates_file) + tuple val(meta5), path(cat_covariates_file) + val mlm_exact + val(mpheno) + + output: + tuple val(meta), val(meta3), path("*.fastGWA"), emit: results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def mpheno_value = (mpheno == null || (mpheno instanceof Collection && mpheno.isEmpty())) ? 1 : mpheno + def mpheno_arg = "--mpheno ${mpheno_value}" + def grm_arg = meta3.is_binary ? '' : "--grm-sparse ${meta2.id}" + def genotype_suffix = bed_pgen.name.tokenize('.').last() + def genotype_flag = genotype_suffix == 'pgen' ? '--pfile' : '--bfile' + def genotype_prefix = meta.id + def prefix = task.ext.prefix ?: "${meta.id}" + def out = "${prefix}_${meta3.id}" + def extra_args = task.ext.args ?: '' + def mode_arg = meta3.is_binary ? '--fastGWA-lr' : (mlm_exact ? '--fastGWA-mlm-exact' : '--fastGWA-mlm') + + """ + set -euo pipefail + + gcta \\ + ${genotype_flag} ${genotype_prefix} \\ + ${grm_arg} \\ + ${mode_arg} \\ + --pheno ${phenotype_file} \\ + ${qcovar_arg} \\ + ${covar_arg} \\ + ${mpheno_arg} \\ + --thread-num ${task.cpus} \\ + --out ${out} ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def out = "${prefix}_${meta3.id}" + """ + touch ${out}.fastGWA + """ +} diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml new file mode 100644 index 000000000000..534830ef8d1f --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -0,0 +1,139 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_fastgwa" +description: Run GCTA fastGWA-MLM with PLINK genotype inputs and a sparse GRM +keywords: + - gcta + - fastgwa + - gwas + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - bed_pgen: + type: file + description: PLINK primary genotype file, either `.bed` or `.pgen` + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: PLINK sidecar file, either `.bim` or `.pvar` + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: PLINK sidecar file, either `.fam` or `.psam` + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy map containing sparse GRM metadata + e.g. `[ id:'plink_simulated_sp' ]` + Used for non-binary fastGWA MLM modes and ignored when `meta3.is_binary` is true + - sparse_grm_id: + type: file + description: Sparse GRM ID file (`.grm.id`), ignored when `meta3.is_binary` is true + pattern: "*.grm.id" + ontologies: [] + - sparse_grm_sp: + type: file + description: Sparse GRM sparse matrix file (`.grm.sp`), ignored when `meta3.is_binary` is true + pattern: "*.grm.sp" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait', is_binary:false ]` + `meta3.is_binary` is required and selects logistic vs MLM fastGWA mode + - phenotype_file: + type: file + description: Phenotype file + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta5: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - mlm_exact: + type: boolean + description: | + Apply `--fastGWA-mlm-exact` for non-binary phenotypes. + Ignored when `meta3.is_binary` is true because binary phenotypes use `--fastGWA-lr`. + - mpheno: + type: integer + description: | + Phenotype column selector passed to `--mpheno`. + Pass `1` explicitly for the default first phenotype column. +output: + results: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - meta3: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - "*.fastGWA": + type: file + description: FastGWA association results + pattern: "*.fastGWA" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test new file mode 100644 index 000000000000..a925ff4d8fd1 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -0,0 +1,299 @@ +nextflow_process { + + name "Test Process GCTA_FASTGWA" + script "../main.nf" + process "GCTA_FASTGWA" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/fastgwa" + tag "gcta/makegrmpart" + tag "gcta/makebksparse" + tag "gawk" + config "./nextflow.config" + + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_BINARY_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'BinaryTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'binary_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_MULTI_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraits' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3, (\$3 * 1.7) + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + + run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") { + script "../../makebksparse/main.nf" + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = quantitative_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + input[6] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype mpheno selection") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> + [[ id:'QuantitativeTraitMpheno2', is_binary:false ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = multi_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + input[6] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "QuantitativeTraitMpheno2" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTraitMpheno2.fastGWA" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and binary phenotype") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:true ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = binary_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + input[6] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "BinaryTrait" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_BinaryTrait.fastGWA" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink2 with sparse GRM - stub") { + options "-stub" + + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = quantitative_pheno + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] + input[5] = false + input[6] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap new file mode 100644 index 000000000000..83de990fa2a8 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -0,0 +1,141 @@ +{ + "homo_sapiens popgen - plink2 with sparse GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ], + "results": [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T18:15:29.679228048" + }, + "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d9190e07273a3de2a15a6e7053aed487" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:01:13.458535259" + }, + "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype mpheno selection": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTraitMpheno2", + "is_binary": false + }, + "plink_simulated_QuantitativeTraitMpheno2.fastGWA:md5,d10da1dac8dccf55a9000c4813d4f625" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-20T22:20:52.380195917" + }, + "homo_sapiens popgen - plink1 with sparse GRM and binary phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "BinaryTrait", + "is_binary": true + }, + "plink_simulated_BinaryTrait.fastGWA:md5,723602dcb94b8a08b3652f1491dcd2ee" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T18:15:22.74128729" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/fastgwa/tests/nextflow.config b/modules/nf-core/gcta/fastgwa/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} From f17a9d3b3d499bf840245977ac433f56078aade7 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 15:53:18 +0800 Subject: [PATCH 06/10] test(gcta/fastgwa): cover plink2 and lr execution --- modules/nf-core/gcta/fastgwa/meta.yml | 2 +- .../tests/helpers/gcta_prepare_sparse/main.nf | 36 ++++ .../nf-core/gcta/fastgwa/tests/main.nf.test | 166 ++++++++++++------ .../gcta/fastgwa/tests/main.nf.test.snap | 75 ++++---- 4 files changed, 192 insertions(+), 87 deletions(-) create mode 100644 modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml index 534830ef8d1f..f2494094ec1f 100644 --- a/modules/nf-core/gcta/fastgwa/meta.yml +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -1,6 +1,6 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "gcta_fastgwa" -description: Run GCTA fastGWA-MLM with PLINK genotype inputs and a sparse GRM +description: Run GCTA fastGWA association modes (`--fastGWA-mlm`, `--fastGWA-mlm-exact`, and `--fastGWA-lr`) with PLINK genotype inputs keywords: - gcta - fastgwa diff --git a/modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf b/modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf new file mode 100644 index 000000000000..13e272085d52 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf @@ -0,0 +1,36 @@ +process GCTA_TEST_PREPARE_SPARSE { + tag "${meta.id}" + label "process_medium" + conda "${moduleDir}/../../../environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + val(grm_cutoff) + + output: + tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.sp"), emit: sparse_grm + + script: + def bfile_prefix = bed.name.replaceFirst(/\.bed$/, "") + def dense_prefix = "${meta.id}_dense" + def sparse_prefix = meta.id + + """ + set -euo pipefail + + gcta \\ + --bfile ${bfile_prefix} \\ + --make-grm \\ + --out ${dense_prefix} \\ + --thread-num ${task.cpus} + + gcta \\ + --grm ${dense_prefix} \\ + --make-bK-sparse ${grm_cutoff} \\ + --out ${sparse_prefix} \\ + --thread-num ${task.cpus} + """ +} diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test index a925ff4d8fd1..f8e9f6dc3767 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -8,9 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/fastgwa" - tag "gcta/makegrmpart" - tag "gcta/makebksparse" tag "gawk" + tag "gcta/testhelpers" config "./nextflow.config" setup { @@ -84,41 +83,17 @@ nextflow_process { } } - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { - script "../../makegrmpart/main.nf" + run("GCTA_TEST_PREPARE_SPARSE", alias: "GCTA_TEST_PREPARE_SPARSE_DENSE") { + script "./helpers/gcta_prepare_sparse/main.nf" process { """ - file('plink_simulated.mbfile').text = 'plink_simulated\\n' - input[0] = [ - [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], - file('plink_simulated.mbfile'), - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) - ] + [ id:'plink_simulated_dense' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] - input[1] = [[ id:'all_variants' ], []] - """ - } - } - - run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") { - script "../../makebksparse/main.nf" - process { - """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } - - input[0] = dense_grm - input[1] = Channel.value(0.05) + input[1] = 0.05 """ } } @@ -128,10 +103,6 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] - } - quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> [[ id:meta.id, is_binary:false ], phenotype_file] } @@ -142,7 +113,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] - input[1] = sparse_grm + input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm input[2] = quantitative_pheno input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output @@ -159,6 +130,9 @@ nextflow_process { { assert process.out.results.get(0).get(0).id == "plink_simulated" }, { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" }, { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" }, + { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-mlm-exact") }, + { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense") }, { assert snapshot( process.out.results, @@ -173,10 +147,6 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] - } - multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> [[ id:'QuantitativeTraitMpheno2', is_binary:false ], phenotype_file] } @@ -187,7 +157,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] - input[1] = sparse_grm + input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm input[2] = multi_pheno input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output @@ -204,6 +174,8 @@ nextflow_process { { assert process.out.results.get(0).get(0).id == "plink_simulated" }, { assert process.out.results.get(0).get(1).id == "QuantitativeTraitMpheno2" }, { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTraitMpheno2.fastGWA" }, + { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--mpheno 2") }, { assert snapshot( process.out.results, @@ -218,21 +190,22 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] - } - binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file -> [[ id:meta.id, is_binary:true ], phenotype_file] } + unused_sparse_id = file("${workDir}/unused_sparse.grm.id") + unused_sparse_id.text = "S1 S1\\n" + unused_sparse_sp = file("${workDir}/unused_sparse.grm.sp") + unused_sparse_sp.text = "1 1 1\\n" + input[0] = [ [ id:"plink_simulated" ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] - input[1] = sparse_grm + input[1] = [[ id:'unused_sparse' ], unused_sparse_id, unused_sparse_sp] input[2] = binary_pheno input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output @@ -249,6 +222,8 @@ nextflow_process { { assert process.out.results.get(0).get(0).id == "plink_simulated" }, { assert process.out.results.get(0).get(1).id == "BinaryTrait" }, { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_BinaryTrait.fastGWA" }, + { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-lr") }, + { assert !file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse") }, { assert snapshot( process.out.results, @@ -259,16 +234,92 @@ nextflow_process { } } - test("homo_sapiens popgen - plink2 with sparse GRM - stub") { - options "-stub" + test("homo_sapiens popgen - plink2 with sparse GRM and quantitative phenotype") { + + when { + process { + """ + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) + ] + input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm + input[2] = quantitative_pheno + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] + input[5] = false + input[6] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" }, + { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + test("homo_sapiens popgen - non-binary fails when sparse GRM prefix mismatches files") { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + + sparse_grm_bad_prefix = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:'incorrect_sparse_prefix' ], sparse_grm_id, sparse_grm_sp] } + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm_bad_prefix + input[2] = quantitative_pheno + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] + input[5] = false + input[6] = 1 + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.exitStatus != 0 }, + { assert process.stdout.toString().contains("incorrect_sparse_prefix") } + ) + } + } + + test("homo_sapiens popgen - plink2 with sparse GRM - stub") { + options "-stub" + + when { + process { + """ quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> [[ id:meta.id, is_binary:false ], phenotype_file] } @@ -279,7 +330,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) ] - input[1] = sparse_grm + input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm input[2] = quantitative_pheno input[3] = [[ id:'covariates_quant' ], []] input[4] = [[ id:'covariates_cat' ], []] @@ -292,7 +343,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } ) } } diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap index 83de990fa2a8..e3341af3ed10 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -1,38 +1,19 @@ { "homo_sapiens popgen - plink2 with sparse GRM - stub": { "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - [ - { - "id": "plink_simulated" - }, - { - "id": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - "GCTA_FASTGWA", - "gcta", - "1.94.1" - ] - ], - "results": [ - [ - { - "id": "plink_simulated" - }, - { - "id": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "versions_gcta": [ [ "GCTA_FASTGWA", @@ -46,7 +27,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T18:15:29.679228048" + "timestamp": "2026-03-21T00:43:00.312501457" }, "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype": { "content": [ @@ -108,6 +89,36 @@ }, "timestamp": "2026-03-20T22:20:52.380195917" }, + "homo_sapiens popgen - plink2 with sparse GRM and quantitative phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,6742a23a7e4280161c104027b1cac012" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:37:52.883027879" + }, "homo_sapiens popgen - plink1 with sparse GRM and binary phenotype": { "content": [ [ From eace1d2355c659326140e86b3b248fdc8d4dad49 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 17:48:47 +0800 Subject: [PATCH 07/10] test(gcta/fastgwa): align helper tags with nf-core lint --- modules/nf-core/gcta/fastgwa/tests/main.nf.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test index f8e9f6dc3767..4fa2f6961ed6 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -9,7 +9,7 @@ nextflow_process { tag "gcta" tag "gcta/fastgwa" tag "gawk" - tag "gcta/testhelpers" + tag "tests/helpers/gcta_prepare_sparse" config "./nextflow.config" setup { @@ -84,7 +84,7 @@ nextflow_process { } run("GCTA_TEST_PREPARE_SPARSE", alias: "GCTA_TEST_PREPARE_SPARSE_DENSE") { - script "./helpers/gcta_prepare_sparse/main.nf" + script "../tests/helpers/gcta_prepare_sparse/main.nf" process { """ input[0] = [ From 31ee46b37f77b04c169596558aa26f5dfba1c588 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 20:42:13 +0800 Subject: [PATCH 08/10] Use real dependency modules in fastgwa tests --- .../tests/helpers/gcta_prepare_sparse/main.nf | 36 ---------- .../nf-core/gcta/fastgwa/tests/main.nf.test | 71 +++++++++++++++---- 2 files changed, 58 insertions(+), 49 deletions(-) delete mode 100644 modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf diff --git a/modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf b/modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf deleted file mode 100644 index 13e272085d52..000000000000 --- a/modules/nf-core/gcta/fastgwa/tests/helpers/gcta_prepare_sparse/main.nf +++ /dev/null @@ -1,36 +0,0 @@ -process GCTA_TEST_PREPARE_SPARSE { - tag "${meta.id}" - label "process_medium" - conda "${moduleDir}/../../../environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" - - input: - tuple val(meta), path(bed), path(bim), path(fam) - val(grm_cutoff) - - output: - tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.sp"), emit: sparse_grm - - script: - def bfile_prefix = bed.name.replaceFirst(/\.bed$/, "") - def dense_prefix = "${meta.id}_dense" - def sparse_prefix = meta.id - - """ - set -euo pipefail - - gcta \\ - --bfile ${bfile_prefix} \\ - --make-grm \\ - --out ${dense_prefix} \\ - --thread-num ${task.cpus} - - gcta \\ - --grm ${dense_prefix} \\ - --make-bK-sparse ${grm_cutoff} \\ - --out ${sparse_prefix} \\ - --thread-num ${task.cpus} - """ -} diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test index 4fa2f6961ed6..2dd11e6bd4db 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -8,8 +8,9 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/fastgwa" + tag "gcta/makegrmpart" + tag "gcta/makebksparse" tag "gawk" - tag "tests/helpers/gcta_prepare_sparse" config "./nextflow.config" setup { @@ -83,17 +84,41 @@ nextflow_process { } } - run("GCTA_TEST_PREPARE_SPARSE", alias: "GCTA_TEST_PREPARE_SPARSE_DENSE") { - script "../tests/helpers/gcta_prepare_sparse/main.nf" + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" process { """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:'plink_simulated_dense' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] - input[1] = 0.05 + input[1] = [[ id:'all_variants' ], []] + """ + } + } + + run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") { + script "../../makebksparse/main.nf" + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) """ } } @@ -103,6 +128,10 @@ nextflow_process { when { process { """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> [[ id:meta.id, is_binary:false ], phenotype_file] } @@ -113,7 +142,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] - input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm + input[1] = sparse_grm input[2] = quantitative_pheno input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output @@ -147,6 +176,10 @@ nextflow_process { when { process { """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> [[ id:'QuantitativeTraitMpheno2', is_binary:false ], phenotype_file] } @@ -157,7 +190,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] - input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm + input[1] = sparse_grm input[2] = multi_pheno input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output @@ -239,6 +272,10 @@ nextflow_process { when { process { """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> [[ id:meta.id, is_binary:false ], phenotype_file] } @@ -249,7 +286,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) ] - input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm + input[1] = sparse_grm input[2] = quantitative_pheno input[3] = [[ id:'covariates_quant' ], []] input[4] = [[ id:'covariates_cat' ], []] @@ -281,11 +318,15 @@ nextflow_process { when { process { """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> [[ id:meta.id, is_binary:false ], phenotype_file] } - sparse_grm_bad_prefix = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm.map { meta, sparse_grm_id, sparse_grm_sp -> + sparse_grm_bad_prefix = sparse_grm.map { meta, sparse_grm_id, sparse_grm_sp -> [[ id:'incorrect_sparse_prefix' ], sparse_grm_id, sparse_grm_sp] } @@ -320,6 +361,10 @@ nextflow_process { when { process { """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> [[ id:meta.id, is_binary:false ], phenotype_file] } @@ -330,7 +375,7 @@ nextflow_process { file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) ] - input[1] = GCTA_TEST_PREPARE_SPARSE_DENSE.out.sparse_grm + input[1] = sparse_grm input[2] = quantitative_pheno input[3] = [[ id:'covariates_quant' ], []] input[4] = [[ id:'covariates_cat' ], []] From addd032719c38fad7e3f7f165d107de471fd2fc3 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 16 May 2026 08:36:40 +0800 Subject: [PATCH 09/10] Update fastgwa sparse GRM contract --- modules/nf-core/gcta/fastgwa/main.nf | 36 ++--- modules/nf-core/gcta/fastgwa/meta.yml | 74 ++++++---- .../nf-core/gcta/fastgwa/tests/main.nf.test | 126 ++++++++++-------- .../gcta/fastgwa/tests/main.nf.test.snap | 50 +++---- modules/nf-core/gcta/makebksparse/main.nf | 18 +-- modules/nf-core/gcta/makebksparse/meta.yml | 39 ++---- .../gcta/makebksparse/tests/main.nf.test | 39 +++--- .../gcta/makebksparse/tests/main.nf.test.snap | 20 +-- modules/nf-core/gcta/makegrm/environment.yml | 7 + modules/nf-core/gcta/makegrm/main.nf | 42 ++++++ modules/nf-core/gcta/makegrm/meta.yml | 91 +++++++++++++ 11 files changed, 344 insertions(+), 198 deletions(-) create mode 100644 modules/nf-core/gcta/makegrm/environment.yml create mode 100644 modules/nf-core/gcta/makegrm/main.nf create mode 100644 modules/nf-core/gcta/makegrm/meta.yml diff --git a/modules/nf-core/gcta/fastgwa/main.nf b/modules/nf-core/gcta/fastgwa/main.nf index b210462381ab..c0eda47b73e9 100644 --- a/modules/nf-core/gcta/fastgwa/main.nf +++ b/modules/nf-core/gcta/fastgwa/main.nf @@ -1,40 +1,39 @@ process GCTA_FASTGWA { - tag "${meta.id}:${meta3.id}" + tag "${meta.id}:${mpheno}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" input: tuple val(meta), path(bed_pgen), path(bim_pvar), path(fam_psam) - tuple val(meta2), path(sparse_grm_id), path(sparse_grm_sp) - tuple val(meta3), path(phenotype_file) + tuple val(meta2), path(sparse_grm_files) + tuple val(meta3), path(phenotype_file), val(mpheno), val(is_binary) tuple val(meta4), path(quant_covariates_file) tuple val(meta5), path(cat_covariates_file) val mlm_exact - val(mpheno) output: - tuple val(meta), val(meta3), path("*.fastGWA"), emit: results - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + tuple val(meta), path("*.fastGWA"), val(mpheno), val(is_binary), emit: results + tuple val(meta), path("*.log"), val(mpheno), val(is_binary), emit: log + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when script: - def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' - def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' - def mpheno_value = (mpheno == null || (mpheno instanceof Collection && mpheno.isEmpty())) ? 1 : mpheno - def mpheno_arg = "--mpheno ${mpheno_value}" - def grm_arg = meta3.is_binary ? '' : "--grm-sparse ${meta2.id}" def genotype_suffix = bed_pgen.name.tokenize('.').last() def genotype_flag = genotype_suffix == 'pgen' ? '--pfile' : '--bfile' - def genotype_prefix = meta.id + def genotype_prefix = bed_pgen.baseName + def grm_arg = sparse_grm_files ? "--grm-sparse ${meta2.id}" : '' + def mode_arg = is_binary ? '--fastGWA-lr' : (mlm_exact ? '--fastGWA-mlm-exact' : '--fastGWA-mlm') + def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def mpheno_arg = mpheno ? "--mpheno ${mpheno}" : '' def prefix = task.ext.prefix ?: "${meta.id}" - def out = "${prefix}_${meta3.id}" + def out = mpheno ? "${prefix}_${mpheno}" : "${prefix}" def extra_args = task.ext.args ?: '' - def mode_arg = meta3.is_binary ? '--fastGWA-lr' : (mlm_exact ? '--fastGWA-mlm-exact' : '--fastGWA-mlm') """ set -euo pipefail @@ -53,8 +52,9 @@ process GCTA_FASTGWA { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def out = "${prefix}_${meta3.id}" + def out = mpheno ? "${prefix}_${mpheno}" : "${prefix}" """ touch ${out}.fastGWA + touch ${out}.log """ } diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml index f2494094ec1f..3d63593878ef 100644 --- a/modules/nf-core/gcta/fastgwa/meta.yml +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -3,8 +3,11 @@ name: "gcta_fastgwa" description: Run GCTA fastGWA association modes (`--fastGWA-mlm`, `--fastGWA-mlm-exact`, and `--fastGWA-lr`) with PLINK genotype inputs keywords: - gcta + - genome-wide complex trait analysis - fastgwa + - fast genome-wide association - gwas + - genome-wide association study - genetics tools: - "gcta": @@ -12,6 +15,8 @@ tools: homepage: "https://yanglab.westlake.edu.cn/software/gcta/" documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" input: - - meta: type: map @@ -39,29 +44,32 @@ input: description: | Groovy map containing sparse GRM metadata e.g. `[ id:'plink_simulated_sp' ]` - Used for non-binary fastGWA MLM modes and ignored when `meta3.is_binary` is true - - sparse_grm_id: + Used when sparse GRM files are supplied + - sparse_grm_files: type: file - description: Sparse GRM ID file (`.grm.id`), ignored when `meta3.is_binary` is true - pattern: "*.grm.id" - ontologies: [] - - sparse_grm_sp: - type: file - description: Sparse GRM sparse matrix file (`.grm.sp`), ignored when `meta3.is_binary` is true - pattern: "*.grm.sp" + description: Sparse GRM sidecar files, pass `[]` when absent + pattern: "*.grm.{id,sp}" ontologies: [] - - meta3: type: map description: | Groovy map containing phenotype metadata - e.g. `[ id:'QuantitativeTrait', is_binary:false ]` - `meta3.is_binary` is required and selects logistic vs MLM fastGWA mode + Keep only stable phenotype metadata in this map + e.g. `[ id:'plink_simulated' ]` - phenotype_file: type: file description: Phenotype file pattern: "*.{phe,pheno,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" + - mpheno: + type: integer + description: | + Phenotype column selector passed to `--mpheno`. + Pass `[]` when absent. + - is_binary: + type: boolean + description: Whether to run logistic fastGWA mode for a binary trait - - meta4: type: map description: | @@ -88,12 +96,7 @@ input: type: boolean description: | Apply `--fastGWA-mlm-exact` for non-binary phenotypes. - Ignored when `meta3.is_binary` is true because binary phenotypes use `--fastGWA-lr`. - - mpheno: - type: integer - description: | - Phenotype column selector passed to `--mpheno`. - Pass `1` explicitly for the default first phenotype column. + Ignored when `is_binary` is true because binary phenotypes use `--fastGWA-lr`. output: results: - - meta: @@ -101,17 +104,36 @@ output: description: | Groovy map containing PLINK genotype metadata e.g. `[ id:'plink_simulated' ]` - - meta3: - type: map - description: | - Groovy map containing phenotype metadata - e.g. `[ id:'QuantitativeTrait' ]` - "*.fastGWA": type: file description: FastGWA association results pattern: "*.fastGWA" ontologies: - edam: "http://edamontology.org/format_2330" + - mpheno: + type: integer + description: Phenotype column selector used for the emitted result + - is_binary: + type: boolean + description: Whether `--fastGWA-lr` was used for the emitted result + log: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: GCTA fastGWA log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" + - mpheno: + type: integer + description: Phenotype column selector used for the emitted log + - is_binary: + type: boolean + description: Whether `--fastGWA-lr` was used for the emitted log versions_gcta: - - "${task.process}": type: string @@ -119,7 +141,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version topics: @@ -130,10 +152,10 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version authors: - - "@andongni" + - "@lyh970817" maintainers: - - "@andongni" + - "@lyh970817" diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test index 2dd11e6bd4db..58aa357ac965 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/fastgwa" - tag "gcta/makegrmpart" + tag "gcta/makegrm" tag "gcta/makebksparse" tag "gawk" config "./nextflow.config" @@ -84,14 +84,14 @@ nextflow_process { } } - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { - script "../../makegrmpart/main.nf" + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_DENSE") { + script "../../makegrm/main.nf" process { """ file('plink_simulated.mbfile').text = 'plink_simulated\\n' input[0] = [ - [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + [ id:'plink_simulated_dense' ], file('plink_simulated.mbfile'), [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) @@ -103,7 +103,6 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] ] - input[1] = [[ id:'all_variants' ], []] """ } } @@ -112,12 +111,7 @@ nextflow_process { script "../../makebksparse/main.nf" process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } - - input[0] = dense_grm + input[0] = GCTA_MAKEGRM_DENSE.out.grm_files input[1] = Channel.value(0.05) """ } @@ -128,12 +122,12 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] } quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> - [[ id:meta.id, is_binary:false ], phenotype_file] + [[ id:meta.id ], phenotype_file, 1, false] } input[0] = [ @@ -147,7 +141,6 @@ nextflow_process { input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output input[5] = true - input[6] = 1 """ } } @@ -156,12 +149,18 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, { assert process.out.results.get(0).get(0).id == "plink_simulated" }, - { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" }, - { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" }, - { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") }, - { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-mlm-exact") }, - { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense") }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 1 }, + { assert process.out.log.get(0).get(3) == false }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_1.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_1.log" }, + { assert path(process.out.results.get(0).get(1)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--fastGWA-mlm-exact") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense_sp") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--mpheno 1") }, { assert snapshot( process.out.results, @@ -176,12 +175,12 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] } multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> - [[ id:'QuantitativeTraitMpheno2', is_binary:false ], phenotype_file] + [[ id:'QuantitativeTraitMpheno2' ], phenotype_file, 2, false] } input[0] = [ @@ -195,7 +194,6 @@ nextflow_process { input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output input[5] = true - input[6] = 2 """ } } @@ -204,11 +202,16 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, { assert process.out.results.get(0).get(0).id == "plink_simulated" }, - { assert process.out.results.get(0).get(1).id == "QuantitativeTraitMpheno2" }, - { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTraitMpheno2.fastGWA" }, - { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") }, - { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--mpheno 2") }, + { assert process.out.results.get(0).get(2) == 2 }, + { assert process.out.results.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 2 }, + { assert process.out.log.get(0).get(3) == false }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_2.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_2.log" }, + { assert path(process.out.results.get(0).get(1)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--mpheno 2") }, { assert snapshot( process.out.results, @@ -224,26 +227,20 @@ nextflow_process { process { """ binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file -> - [[ id:meta.id, is_binary:true ], phenotype_file] + [[ id:meta.id ], phenotype_file, 1, true] } - unused_sparse_id = file("${workDir}/unused_sparse.grm.id") - unused_sparse_id.text = "S1 S1\\n" - unused_sparse_sp = file("${workDir}/unused_sparse.grm.sp") - unused_sparse_sp.text = "1 1 1\\n" - input[0] = [ [ id:"plink_simulated" ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) ] - input[1] = [[ id:'unused_sparse' ], unused_sparse_id, unused_sparse_sp] + input[1] = [[ id:'empty_sparse' ], []] input[2] = binary_pheno input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output input[4] = GAWK_CATEGORICAL_COVARIATES.out.output input[5] = true - input[6] = 1 """ } } @@ -252,11 +249,17 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, { assert process.out.results.get(0).get(0).id == "plink_simulated" }, - { assert process.out.results.get(0).get(1).id == "BinaryTrait" }, - { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_BinaryTrait.fastGWA" }, - { assert file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--fastGWA-lr") }, - { assert !file(path(process.out.results.get(0).get(2)).parent.toString() + "/.command.sh").text.contains("--grm-sparse") }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == true }, + { assert process.out.log.get(0).get(2) == 1 }, + { assert process.out.log.get(0).get(3) == true }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_1.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_1.log" }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--fastGWA-lr") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--mpheno 1") }, + { assert !file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--grm-sparse") }, { assert snapshot( process.out.results, @@ -272,12 +275,12 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] } quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> - [[ id:meta.id, is_binary:false ], phenotype_file] + [[ id:meta.id ], phenotype_file, 1, false] } input[0] = [ @@ -291,7 +294,6 @@ nextflow_process { input[3] = [[ id:'covariates_quant' ], []] input[4] = [[ id:'covariates_cat' ], []] input[5] = false - input[6] = 1 """ } } @@ -300,10 +302,16 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, { assert process.out.results.get(0).get(0).id == "plink_simulated" }, - { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" }, - { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" }, - { assert path(process.out.results.get(0).get(2)).readLines().get(0).contains("BETA") }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 1 }, + { assert process.out.log.get(0).get(3) == false }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_1.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_1.log" }, + { assert path(process.out.results.get(0).get(1)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense_sp") }, { assert snapshot( process.out.results, @@ -318,16 +326,16 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] } quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> - [[ id:meta.id, is_binary:false ], phenotype_file] + [[ id:meta.id ], phenotype_file, 1, false] } - sparse_grm_bad_prefix = sparse_grm.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:'incorrect_sparse_prefix' ], sparse_grm_id, sparse_grm_sp] + sparse_grm_bad_prefix = sparse_grm.map { meta, sparse_grm_files -> + [[ id:'incorrect_sparse_prefix' ], sparse_grm_files] } input[0] = [ @@ -341,7 +349,6 @@ nextflow_process { input[3] = [[ id:'covariates_quant' ], []] input[4] = [[ id:'covariates_cat' ], []] input[5] = false - input[6] = 1 """ } } @@ -350,7 +357,8 @@ nextflow_process { assertAll( { assert !process.success }, { assert process.exitStatus != 0 }, - { assert process.stdout.toString().contains("incorrect_sparse_prefix") } + { assert process.stdout.toString().contains("incorrect_sparse_prefix") }, + { assert process.stdout.toString().contains("incorrect_sparse_prefix.grm.id") } ) } } @@ -361,12 +369,12 @@ nextflow_process { when { process { """ - sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> - [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] } quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> - [[ id:meta.id, is_binary:false ], phenotype_file] + [[ id:meta.id ], phenotype_file, 1, false] } input[0] = [ @@ -380,7 +388,6 @@ nextflow_process { input[3] = [[ id:'covariates_quant' ], []] input[4] = [[ id:'covariates_cat' ], []] input[5] = false - input[6] = 1 """ } } @@ -389,7 +396,10 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == false }, { assert snapshot( process.out.results, diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap index e3341af3ed10..9b41d8bfd154 100644 --- a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -6,11 +6,9 @@ { "id": "plink_simulated" }, - { - "id": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + "plink_simulated_1.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e", + 1, + false ] ], { @@ -27,7 +25,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:43:00.312501457" + "timestamp": "2026-05-15T21:51:24.403157293" }, "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype": { "content": [ @@ -36,11 +34,9 @@ { "id": "plink_simulated" }, - { - "id": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated_QuantitativeTrait.fastGWA:md5,d9190e07273a3de2a15a6e7053aed487" + "plink_simulated_1.fastGWA:md5,d9190e07273a3de2a15a6e7053aed487", + 1, + false ] ], { @@ -57,7 +53,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-18T19:01:13.458535259" + "timestamp": "2026-05-15T21:41:12.959660072" }, "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype mpheno selection": { "content": [ @@ -66,11 +62,9 @@ { "id": "plink_simulated" }, - { - "id": "QuantitativeTraitMpheno2", - "is_binary": false - }, - "plink_simulated_QuantitativeTraitMpheno2.fastGWA:md5,d10da1dac8dccf55a9000c4813d4f625" + "plink_simulated_2.fastGWA:md5,d10da1dac8dccf55a9000c4813d4f625", + 2, + false ] ], { @@ -87,7 +81,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T22:20:52.380195917" + "timestamp": "2026-05-15T21:43:43.761213489" }, "homo_sapiens popgen - plink2 with sparse GRM and quantitative phenotype": { "content": [ @@ -96,11 +90,9 @@ { "id": "plink_simulated" }, - { - "id": "QuantitativeTrait", - "is_binary": false - }, - "plink_simulated_QuantitativeTrait.fastGWA:md5,6742a23a7e4280161c104027b1cac012" + "plink_simulated_1.fastGWA:md5,6742a23a7e4280161c104027b1cac012", + 1, + false ] ], { @@ -117,7 +109,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:37:52.883027879" + "timestamp": "2026-05-15T21:48:22.048462109" }, "homo_sapiens popgen - plink1 with sparse GRM and binary phenotype": { "content": [ @@ -126,11 +118,9 @@ { "id": "plink_simulated" }, - { - "id": "BinaryTrait", - "is_binary": true - }, - "plink_simulated_BinaryTrait.fastGWA:md5,723602dcb94b8a08b3652f1491dcd2ee" + "plink_simulated_1.fastGWA:md5,723602dcb94b8a08b3652f1491dcd2ee", + 1, + true ] ], { @@ -147,6 +137,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T18:15:22.74128729" + "timestamp": "2026-05-15T21:46:07.594442521" } } \ No newline at end of file diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf index 08dde9742be8..cd62a7dbe747 100644 --- a/modules/nf-core/gcta/makebksparse/main.nf +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -2,24 +2,24 @@ process GCTA_MAKEBKSPARSE { tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" input: - tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) - val(cutoff) + tuple val(meta), path(grm_files) + val cutoff output: - tuple val(meta), path("*_sp.grm.id"), path("*_sp.grm.sp"), emit: sparse_grm_files - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + tuple val(meta), path("*_sp.grm.*"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extra_args = task.ext.args ?: '' """ gcta \\ @@ -27,7 +27,7 @@ process GCTA_MAKEBKSPARSE { --make-bK-sparse ${cutoff} \\ --out ${prefix}_sp \\ --thread-num ${task.cpus} \\ - ${args} + ${extra_args} """ stub: diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml index 1a2a2d9f600f..e386fc180e50 100644 --- a/modules/nf-core/gcta/makebksparse/meta.yml +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -3,7 +3,9 @@ name: "gcta_makebksparse" description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses keywords: - gcta + - genome-wide complex trait analysis - grm + - genetic relationship matrix - sparse - genetics tools: @@ -12,6 +14,8 @@ tools: homepage: "https://yanglab.westlake.edu.cn/software/gcta/" documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" input: - - meta: @@ -21,20 +25,10 @@ input: e.g. `[ id:'plink_simulated' ]` `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`. Input files must therefore be staged as `.grm.id`, `.grm.bin`, and `.grm.N.bin`. - - grm_id: + - grm_files: type: file - description: Dense GRM sample identifier file - pattern: "*.grm.id" - ontologies: [] - - grm_bin: - type: file - description: Dense GRM binary matrix file - pattern: "*.grm.bin" - ontologies: [] - - grm_n_bin: - type: file - description: Dense GRM sample-count matrix file - pattern: "*.grm.N.bin" + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" ontologies: [] - cutoff: type: float @@ -48,15 +42,10 @@ output: Groovy map containing dense GRM metadata e.g. `[ id:'plink_simulated' ]` `meta.id` is preserved from the input dense GRM basename contract. - - "*_sp.grm.id": - type: file - description: Sparse GRM sample identifier file - pattern: "*_sp.grm.id" - ontologies: [] - - "*_sp.grm.sp": + - "*_sp.grm.*": type: file - description: Sparse GRM matrix file - pattern: "*_sp.grm.sp" + description: Sparse GRM sidecar files + pattern: "*_sp.grm.{id,sp}" ontologies: [] versions_gcta: - - "${task.process}": @@ -65,7 +54,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version @@ -77,11 +66,11 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version authors: - - "@andongni" + - "@lyh970817" maintainers: - - "@andongni" + - "@lyh970817" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index 03118bc0ed49..99e77de8f04f 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,17 +8,17 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" - tag "gcta/makegrmpart" + tag "gcta/makegrm" setup { - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_CONTRACT") { - script "../../makegrmpart/main.nf" + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") { + script "../../makegrm/main.nf" process { """ file('contract_dense.mbfile').text = 'plink_simulated\\n' input[0] = [ - [ id:'contract_dense', part_gcta_job:1, nparts_gcta:1 ], + [ id:'contract_dense' ], file('contract_dense.mbfile'), [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) @@ -30,19 +30,18 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] ] - input[1] = [[ id:'all_variants' ], []] """ } } - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_STUB") { - script "../../makegrmpart/main.nf" + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") { + script "../../makegrm/main.nf" process { """ file('stub_dense.mbfile').text = 'plink_simulated\\n' input[0] = [ - [ id:'stub_dense', part_gcta_job:1, nparts_gcta:1 ], + [ id:'stub_dense' ], file('stub_dense.mbfile'), [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) @@ -54,7 +53,6 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] ] - input[1] = [[ id:'all_variants' ], []] """ } } @@ -66,10 +64,7 @@ nextflow_process { when { process { """ - input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files input[1] = Channel.value(0.05) """ } @@ -79,7 +74,8 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense.part_1_1" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" }, + { assert process.out.sparse_grm_files.get(0).size() == 2 }, { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } ) @@ -92,11 +88,8 @@ nextflow_process { when { process { """ - input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - }.map { meta, grm_id, grm_bin, grm_n_bin -> - [[ id:'contract_dense_mismatch' ], grm_id, grm_bin, grm_n_bin] + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files -> + [[ id:'contract_dense_mismatch' ], grm_files] } input[1] = Channel.value(0.05) """ @@ -118,10 +111,7 @@ nextflow_process { when { process { """ - input[0] = GCTA_MAKEGRMPART_STUB.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + input[0] = GCTA_MAKEGRM_STUB.out.grm_files input[1] = Channel.value(0.05) """ } @@ -131,7 +121,8 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense.part_1_1" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" }, + { assert process.out.sparse_grm_files.get(0).size() == 2 }, { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } ) diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 6cbf64c83810..cd4f74b2378a 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -4,10 +4,12 @@ [ [ { - "id": "stub_dense.part_1_1" + "id": "stub_dense" }, - "stub_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "stub_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ] ], @@ -15,7 +17,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T20:34:14.13446136" + "timestamp": "2026-05-15T21:14:25.094890591" }, "versions": { "content": [ @@ -40,10 +42,12 @@ [ [ { - "id": "contract_dense.part_1_1" + "id": "contract_dense" }, - "contract_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "contract_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + [ + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] ] ] ], @@ -51,7 +55,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T20:33:53.993675061" + "timestamp": "2026-05-15T21:11:38.107540519" }, "stub_versions": { "content": [ diff --git a/modules/nf-core/gcta/makegrm/environment.yml b/modules/nf-core/gcta/makegrm/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makegrm/main.nf b/modules/nf-core/gcta/makegrm/main.nf new file mode 100644 index 000000000000..42fc57c4445e --- /dev/null +++ b/modules/nf-core/gcta/makegrm/main.nf @@ -0,0 +1,42 @@ +process GCTA_MAKEGRM { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam) + + output: + tuple val(meta), path("*.grm.*"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen] + def genotype_extension = genotype_files[0].name.tokenize('.').last() + def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile' + def prefix = task.ext.prefix ?: "${meta.id}" + def extra_args = task.ext.args ?: '' + + """ + + gcta \\ + ${multi_file_flag} ${mfile} \\ + --make-grm \\ + --thread-num ${task.cpus} \\ + --out ${prefix} ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.grm.id + touch ${prefix}.grm.bin + touch ${prefix}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/makegrm/meta.yml b/modules/nf-core/gcta/makegrm/meta.yml new file mode 100644 index 000000000000..0c813dadada6 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/meta.yml @@ -0,0 +1,91 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makegrm" +description: Compute a whole dense GRM with GCTA +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: + - "GPL-3.0-only" + identifier: biotools:gcta + +input: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - mfile: + type: file + description: GCTA multi-input manifest consumed by `--mbfile` or + `--mpfile` + pattern: "*.{mbfile,mpfile,txt}" + ontologies: + - edam: "http://edamontology.org/format_2330" + - bed_pgen: + type: file + description: Collection of PLINK primary genotype files referenced by the + multi-input manifest + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: Collection of PLINK variant metadata files referenced by the + multi-input manifest + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: Collection of PLINK sample metadata files referenced by the + multi-input manifest + pattern: "*.{fam,psam}" + ontologies: [] + +output: + grm_files: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - "*.grm.*": + type: file + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + versions_gcta: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" From 8bf58659147cd5a2c61766bb02010fc5d5ed54a9 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 16 May 2026 18:35:09 +0800 Subject: [PATCH 10/10] Sync gcta/makegrm setup module --- modules/nf-core/gcta/makegrm/main.nf | 4 +- .../nf-core/gcta/makegrm/tests/main.nf.test | 142 ++++++++++++++++++ .../gcta/makegrm/tests/main.nf.test.snap | 111 ++++++++++++++ .../gcta/makegrm/tests/nextflow.config | 3 + 4 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/gcta/makegrm/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makegrm/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/makegrm/tests/nextflow.config diff --git a/modules/nf-core/gcta/makegrm/main.nf b/modules/nf-core/gcta/makegrm/main.nf index 42fc57c4445e..0bb78639e361 100644 --- a/modules/nf-core/gcta/makegrm/main.nf +++ b/modules/nf-core/gcta/makegrm/main.nf @@ -17,11 +17,11 @@ process GCTA_MAKEGRM { task.ext.when == null || task.ext.when script: + def extra_args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen] def genotype_extension = genotype_files[0].name.tokenize('.').last() def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile' - def prefix = task.ext.prefix ?: "${meta.id}" - def extra_args = task.ext.args ?: '' """ diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test b/modules/nf-core/gcta/makegrm/tests/main.nf.test new file mode 100644 index 000000000000..e5c63233678b --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test @@ -0,0 +1,142 @@ +nextflow_process { + + name "Test Process GCTA_MAKEGRM" + script "../main.nf" + process "GCTA_MAKEGRM" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makegrm" + + test("homo_sapiens popgen - plink2") { + when { + process { + """ + file('gcta_grm.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n' + + input[0] = [ + [ id:'gcta_grm' ], + file('gcta_grm.mpfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm.grm.id', + 'gcta_grm.grm.bin', + 'gcta_grm.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mpfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1") { + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm_bed' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm_bed.grm.id', + 'gcta_grm_bed.grm.bin', + 'gcta_grm_bed.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mbfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 - stub") { + options "-stub" + + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap new file mode 100644 index 000000000000..f8fbe133d5a2 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap @@ -0,0 +1,111 @@ +{ + "homo_sapiens popgen - plink2": { + "content": [ + [ + [ + { + "id": "gcta_grm" + }, + [ + "gcta_grm.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:08:43.209734458" + }, + "homo_sapiens popgen - plink1": { + "content": [ + [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm_bed.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm_bed.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:09:34.058651287" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ], + "grm_files": [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:10:21.024687128" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makegrm/tests/nextflow.config b/modules/nf-core/gcta/makegrm/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +}