From b6e108375412548b865ef004af58f0c33bb11410 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 20 Mar 2026 23:12:13 +0800 Subject: [PATCH 1/6] feat: add gcta/makebksparse module --- .../nf-core/gcta/makebksparse/environment.yml | 7 ++ modules/nf-core/gcta/makebksparse/main.nf | 39 ++++++++ modules/nf-core/gcta/makebksparse/meta.yml | 84 ++++++++++++++++ .../gcta/makebksparse/tests/main.nf.test | 96 +++++++++++++++++++ .../gcta/makebksparse/tests/main.nf.test.snap | 72 ++++++++++++++ .../gcta/makebksparse/tests/nextflow.config | 3 + 6 files changed, 301 insertions(+) create mode 100644 modules/nf-core/gcta/makebksparse/environment.yml create mode 100644 modules/nf-core/gcta/makebksparse/main.nf create mode 100644 modules/nf-core/gcta/makebksparse/meta.yml create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/makebksparse/tests/nextflow.config diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf new file mode 100644 index 000000000000..08dde9742be8 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -0,0 +1,39 @@ +process GCTA_MAKEBKSPARSE { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + val(cutoff) + + output: + tuple val(meta), path("*_sp.grm.id"), path("*_sp.grm.sp"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + gcta \\ + --grm ${meta.id} \\ + --make-bK-sparse ${cutoff} \\ + --out ${prefix}_sp \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_sp.grm.id + touch ${prefix}_sp.grm.sp + """ +} diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml new file mode 100644 index 000000000000..a7663d3e0daf --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -0,0 +1,84 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makebksparse" +description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses +keywords: + - gcta + - grm + - sparse + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - cutoff: + type: float + description: Sparse GRM cutoff passed to `--make-bK-sparse` + +output: + sparse_grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "*_sp.grm.id": + type: file + description: Sparse GRM sample identifier file + pattern: "*_sp.grm.id" + ontologies: [] + - "*_sp.grm.sp": + type: file + description: Sparse GRM matrix file + pattern: "*_sp.grm.sp" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test new file mode 100644 index 000000000000..37a3a78367ff --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process GCTA_MAKEBKSPARSE" + script "../main.nf" + process "GCTA_MAKEBKSPARSE" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makebksparse" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - create sparse GRM") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.sparse_grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - create sparse GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap new file mode 100644 index 000000000000..42828835a289 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "homo_sapiens popgen - create sparse GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:15.829559344" + }, + "homo_sapiens popgen - create sparse GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ], + "sparse_grm_files": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:22.049066879" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makebksparse/tests/nextflow.config b/modules/nf-core/gcta/makebksparse/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} From 99d093b9d6ae2c391b77b6cb3ea2ad9a33c892a4 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 15:53:17 +0800 Subject: [PATCH 2/6] test(gcta/makebksparse): harden basename contract coverage --- modules/nf-core/gcta/makebksparse/meta.yml | 3 + .../gcta/makebksparse/tests/main.nf.test | 103 +++++++++++------- .../gcta/makebksparse/tests/main.nf.test.snap | 68 ++++++------ 3 files changed, 100 insertions(+), 74 deletions(-) diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml index a7663d3e0daf..1a2a2d9f600f 100644 --- a/modules/nf-core/gcta/makebksparse/meta.yml +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -19,6 +19,8 @@ input: description: | Groovy map containing dense GRM metadata e.g. `[ id:'plink_simulated' ]` + `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`. + Input files must therefore be staged as `.grm.id`, `.grm.bin`, and `.grm.N.bin`. - grm_id: type: file description: Dense GRM sample identifier file @@ -45,6 +47,7 @@ output: description: | Groovy map containing dense GRM metadata e.g. `[ id:'plink_simulated' ]` + `meta.id` is preserved from the input dense GRM basename contract. - "*_sp.grm.id": type: file description: Sparse GRM sample identifier file diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index 37a3a78367ff..ceacc1e5d8a1 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,46 +8,67 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" - tag "gcta/makegrmpart" - setup { - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { - script "../../makegrmpart/main.nf" + test("homo_sapiens popgen - create sparse GRM") { + config "./nextflow.config" + + when { process { """ - file('plink_simulated.mbfile').text = 'plink_simulated\\n' - - input[0] = [ - [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], - file('plink_simulated.mbfile'), - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) - ] - ] - input[1] = [[ id:'all_variants' ], []] + file('contract_dense.grm.id').text = "fid1 iid1\\n" + + grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmBuffer.putFloat(1.0f) + file('contract_dense.grm.bin').bytes = grmBuffer.array() + + grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmNBuffer.putFloat(100.0f) + file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() + + input[0] = Channel.value([ + [ id:'contract_dense' ], + file('contract_dense.grm.id'), + file('contract_dense.grm.bin'), + file('contract_dense.grm.N.bin') + ]) + input[1] = Channel.value(0.05) """ } } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" }, + { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } + ) + } } - test("homo_sapiens popgen - create sparse GRM") { + test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") { config "./nextflow.config" when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + file('contract_dense.grm.id').text = "fid1 iid1\\n" - input[0] = dense_grm + grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmBuffer.putFloat(1.0f) + file('contract_dense.grm.bin').bytes = grmBuffer.array() + + grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + grmNBuffer.putFloat(100.0f) + file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() + + input[0] = Channel.value([ + [ id:'contract_dense_mismatch' ], + file('contract_dense.grm.id'), + file('contract_dense.grm.bin'), + file('contract_dense.grm.N.bin') + ]) input[1] = Channel.value(0.05) """ } @@ -55,15 +76,8 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, - { - assert snapshot( - process.out.sparse_grm_files, - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() - } + { assert !process.success }, + { assert process.exitStatus != 0 } ) } } @@ -75,12 +89,16 @@ nextflow_process { when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + file('stub_dense.grm.id').text = "fid1 iid1\\n" + file('stub_dense.grm.bin').bytes = [] + file('stub_dense.grm.N.bin').bytes = [] - input[0] = dense_grm + input[0] = Channel.value([ + [ id:'stub_dense' ], + file('stub_dense.grm.id'), + file('stub_dense.grm.bin'), + file('stub_dense.grm.N.bin') + ]) input[1] = Channel.value(0.05) """ } @@ -89,7 +107,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" }, + { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } ) } } diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 42828835a289..593c3fd0549f 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -1,15 +1,24 @@ { - "homo_sapiens popgen - create sparse GRM": { + "stub_sparse_grm_files": { "content": [ [ [ { - "id": "plink_simulated_dense.part_1_1" + "id": "stub_dense" }, - "plink_simulated_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "plink_simulated_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:48.770909526" + }, + "versions": { + "content": [ { "versions_gcta": [ [ @@ -24,36 +33,29 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:36:15.829559344" + "timestamp": "2026-03-21T00:30:38.045354436" + }, + "sparse_grm_files": { + "content": [ + [ + [ + { + "id": "contract_dense" + }, + "contract_dense_sp.grm.id:md5,c1fd3a827b108cf8b749e4fced7b7a52", + "contract_dense_sp.grm.sp:md5,9c1256f576632d254861b63586e7a3da" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:38.025448523" }, - "homo_sapiens popgen - create sparse GRM - stub": { + "stub_versions": { "content": [ { - "0": [ - [ - { - "id": "plink_simulated_dense.part_1_1" - }, - "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - "GCTA_MAKEBKSPARSE", - "gcta", - "1.94.1" - ] - ], - "sparse_grm_files": [ - [ - { - "id": "plink_simulated_dense.part_1_1" - }, - "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "versions_gcta": [ [ "GCTA_MAKEBKSPARSE", @@ -67,6 +69,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T15:36:22.049066879" + "timestamp": "2026-03-21T00:30:48.775770627" } } \ No newline at end of file From d827fabbaebdf4a1cf65bd7b09697ac3a3bfc806 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 18:47:04 +0800 Subject: [PATCH 3/6] test(gcta/makebksparse): generate GRM inputs in setup --- .../tests/helpers/dense_grm/main.nf | 33 ++++++++ .../gcta/makebksparse/tests/main.nf.test | 78 +++++++++---------- .../gcta/makebksparse/tests/main.nf.test.snap | 6 +- 3 files changed, 72 insertions(+), 45 deletions(-) create mode 100644 modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf diff --git a/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf b/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf new file mode 100644 index 000000000000..deefd4ad119e --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf @@ -0,0 +1,33 @@ +process GCTA_TEST_DENSE_GRM { + tag "${meta.id}" + label "process_medium" + conda "${projectDir}/modules/nf-core/gcta/makebksparse/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + + output: + tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: dense_grm + + script: + def bfile_prefix = bed.baseName + """ + set -euo pipefail + + gcta \\ + --bfile "${bfile_prefix}" \\ + --make-grm \\ + --out "${meta.id}" \\ + --thread-num ${task.cpus} + """ + + stub: + """ + touch "${meta.id}.grm.id" + touch "${meta.id}.grm.bin" + touch "${meta.id}.grm.N.bin" + """ +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index ceacc1e5d8a1..cc8c4c63731c 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,6 +8,37 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" + tag "tests/helpers/dense_grm" + + setup { + run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_CONTRACT") { + script "../tests/helpers/dense_grm/main.nf" + process { + """ + input[0] = [ + [ id:'contract_dense' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + """ + } + } + + run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_STUB") { + script "../tests/helpers/dense_grm/main.nf" + process { + """ + input[0] = [ + [ id:'stub_dense' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + """ + } + } + } test("homo_sapiens popgen - create sparse GRM") { config "./nextflow.config" @@ -15,22 +46,7 @@ nextflow_process { when { process { """ - file('contract_dense.grm.id').text = "fid1 iid1\\n" - - grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmBuffer.putFloat(1.0f) - file('contract_dense.grm.bin').bytes = grmBuffer.array() - - grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmNBuffer.putFloat(100.0f) - file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() - - input[0] = Channel.value([ - [ id:'contract_dense' ], - file('contract_dense.grm.id'), - file('contract_dense.grm.bin'), - file('contract_dense.grm.N.bin') - ]) + input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm input[1] = Channel.value(0.05) """ } @@ -53,22 +69,9 @@ nextflow_process { when { process { """ - file('contract_dense.grm.id').text = "fid1 iid1\\n" - - grmBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmBuffer.putFloat(1.0f) - file('contract_dense.grm.bin').bytes = grmBuffer.array() - - grmNBuffer = java.nio.ByteBuffer.allocate(4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - grmNBuffer.putFloat(100.0f) - file('contract_dense.grm.N.bin').bytes = grmNBuffer.array() - - input[0] = Channel.value([ - [ id:'contract_dense_mismatch' ], - file('contract_dense.grm.id'), - file('contract_dense.grm.bin'), - file('contract_dense.grm.N.bin') - ]) + input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm.map { meta, grm_id, grm_bin, grm_n_bin -> + [[ id:'contract_dense_mismatch' ], grm_id, grm_bin, grm_n_bin] + } input[1] = Channel.value(0.05) """ } @@ -89,16 +92,7 @@ nextflow_process { when { process { """ - file('stub_dense.grm.id').text = "fid1 iid1\\n" - file('stub_dense.grm.bin').bytes = [] - file('stub_dense.grm.N.bin').bytes = [] - - input[0] = Channel.value([ - [ id:'stub_dense' ], - file('stub_dense.grm.id'), - file('stub_dense.grm.bin'), - file('stub_dense.grm.N.bin') - ]) + input[0] = GCTA_TEST_DENSE_GRM_STUB.out.dense_grm input[1] = Channel.value(0.05) """ } diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 593c3fd0549f..0cf695b718eb 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -42,8 +42,8 @@ { "id": "contract_dense" }, - "contract_dense_sp.grm.id:md5,c1fd3a827b108cf8b749e4fced7b7a52", - "contract_dense_sp.grm.sp:md5,9c1256f576632d254861b63586e7a3da" + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" ] ] ], @@ -51,7 +51,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:30:38.025448523" + "timestamp": "2026-03-21T18:40:37.776832502" }, "stub_versions": { "content": [ From feb66fe85a8e847c1657ff8f6fd8303645be8214 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 20:35:27 +0800 Subject: [PATCH 4/6] Use real makegrmpart in makebksparse tests --- .../tests/helpers/dense_grm/main.nf | 33 ---------- .../gcta/makebksparse/tests/main.nf.test | 65 ++++++++++++++----- .../gcta/makebksparse/tests/main.nf.test.snap | 16 ++--- 3 files changed, 55 insertions(+), 59 deletions(-) delete mode 100644 modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf diff --git a/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf b/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf deleted file mode 100644 index deefd4ad119e..000000000000 --- a/modules/nf-core/gcta/makebksparse/tests/helpers/dense_grm/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process GCTA_TEST_DENSE_GRM { - tag "${meta.id}" - label "process_medium" - conda "${projectDir}/modules/nf-core/gcta/makebksparse/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" - - input: - tuple val(meta), path(bed), path(bim), path(fam) - - output: - tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: dense_grm - - script: - def bfile_prefix = bed.baseName - """ - set -euo pipefail - - gcta \\ - --bfile "${bfile_prefix}" \\ - --make-grm \\ - --out "${meta.id}" \\ - --thread-num ${task.cpus} - """ - - stub: - """ - touch "${meta.id}.grm.id" - touch "${meta.id}.grm.bin" - touch "${meta.id}.grm.N.bin" - """ -} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index cc8c4c63731c..03118bc0ed49 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,33 +8,53 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" - tag "tests/helpers/dense_grm" + tag "gcta/makegrmpart" setup { - run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_CONTRACT") { - script "../tests/helpers/dense_grm/main.nf" + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_CONTRACT") { + script "../../makegrmpart/main.nf" process { """ + file('contract_dense.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:'contract_dense' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + [ id:'contract_dense', part_gcta_job:1, nparts_gcta:1 ], + file('contract_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] + input[1] = [[ id:'all_variants' ], []] """ } } - run("GCTA_TEST_DENSE_GRM", alias: "GCTA_TEST_DENSE_GRM_STUB") { - script "../tests/helpers/dense_grm/main.nf" + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_STUB") { + script "../../makegrmpart/main.nf" process { """ + file('stub_dense.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:'stub_dense' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + [ id:'stub_dense', part_gcta_job:1, nparts_gcta:1 ], + file('stub_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] + input[1] = [[ id:'all_variants' ], []] """ } } @@ -46,7 +66,10 @@ nextflow_process { when { process { """ - input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm + input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } input[1] = Channel.value(0.05) """ } @@ -56,7 +79,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense.part_1_1" }, { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } ) @@ -69,7 +92,10 @@ nextflow_process { when { process { """ - input[0] = GCTA_TEST_DENSE_GRM_CONTRACT.out.dense_grm.map { meta, grm_id, grm_bin, grm_n_bin -> + input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + }.map { meta, grm_id, grm_bin, grm_n_bin -> [[ id:'contract_dense_mismatch' ], grm_id, grm_bin, grm_n_bin] } input[1] = Channel.value(0.05) @@ -92,7 +118,10 @@ nextflow_process { when { process { """ - input[0] = GCTA_TEST_DENSE_GRM_STUB.out.dense_grm + input[0] = GCTA_MAKEGRMPART_STUB.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } input[1] = Channel.value(0.05) """ } @@ -102,7 +131,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense.part_1_1" }, { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } ) diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 0cf695b718eb..6cbf64c83810 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -4,10 +4,10 @@ [ [ { - "id": "stub_dense" + "id": "stub_dense.part_1_1" }, - "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + "stub_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" ] ] ], @@ -15,7 +15,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:30:48.770909526" + "timestamp": "2026-03-21T20:34:14.13446136" }, "versions": { "content": [ @@ -40,10 +40,10 @@ [ [ { - "id": "contract_dense" + "id": "contract_dense.part_1_1" }, - "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + "contract_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" ] ] ], @@ -51,7 +51,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T18:40:37.776832502" + "timestamp": "2026-03-21T20:33:53.993675061" }, "stub_versions": { "content": [ From 43b59240c47996a55a6f3a32a522cb41d38605f8 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 16 May 2026 08:36:06 +0800 Subject: [PATCH 5/6] Refine gcta/makebksparse GRM input contract --- modules/nf-core/gcta/makebksparse/main.nf | 19 +-- modules/nf-core/gcta/makebksparse/meta.yml | 39 ++--- .../gcta/makebksparse/tests/main.nf.test | 37 ++--- .../gcta/makebksparse/tests/main.nf.test.snap | 20 ++- modules/nf-core/gcta/makegrm/environment.yml | 7 + modules/nf-core/gcta/makegrm/main.nf | 42 ++++++ modules/nf-core/gcta/makegrm/meta.yml | 91 +++++++++++ .../nf-core/gcta/makegrm/tests/main.nf.test | 142 ++++++++++++++++++ .../gcta/makegrm/tests/main.nf.test.snap | 103 +++++++++++++ .../gcta/makegrm/tests/nextflow.config | 3 + 10 files changed, 437 insertions(+), 66 deletions(-) create mode 100644 modules/nf-core/gcta/makegrm/environment.yml create mode 100644 modules/nf-core/gcta/makegrm/main.nf create mode 100644 modules/nf-core/gcta/makegrm/meta.yml create mode 100644 modules/nf-core/gcta/makegrm/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makegrm/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/makegrm/tests/nextflow.config diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf index 08dde9742be8..96bc3b3e0e8c 100644 --- a/modules/nf-core/gcta/makebksparse/main.nf +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -2,32 +2,33 @@ process GCTA_MAKEBKSPARSE { tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" input: - tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) - val(cutoff) + tuple val(meta), path(grm_files) + val cutoff output: - tuple val(meta), path("*_sp.grm.id"), path("*_sp.grm.sp"), emit: sparse_grm_files - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + tuple val(meta), path("*_sp.grm.*"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def extra_args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ + gcta \\ --grm ${meta.id} \\ --make-bK-sparse ${cutoff} \\ --out ${prefix}_sp \\ --thread-num ${task.cpus} \\ - ${args} + ${extra_args} """ stub: diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml index 1a2a2d9f600f..e386fc180e50 100644 --- a/modules/nf-core/gcta/makebksparse/meta.yml +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -3,7 +3,9 @@ name: "gcta_makebksparse" description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses keywords: - gcta + - genome-wide complex trait analysis - grm + - genetic relationship matrix - sparse - genetics tools: @@ -12,6 +14,8 @@ tools: homepage: "https://yanglab.westlake.edu.cn/software/gcta/" documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" input: - - meta: @@ -21,20 +25,10 @@ input: e.g. `[ id:'plink_simulated' ]` `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`. Input files must therefore be staged as `.grm.id`, `.grm.bin`, and `.grm.N.bin`. - - grm_id: + - grm_files: type: file - description: Dense GRM sample identifier file - pattern: "*.grm.id" - ontologies: [] - - grm_bin: - type: file - description: Dense GRM binary matrix file - pattern: "*.grm.bin" - ontologies: [] - - grm_n_bin: - type: file - description: Dense GRM sample-count matrix file - pattern: "*.grm.N.bin" + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" ontologies: [] - cutoff: type: float @@ -48,15 +42,10 @@ output: Groovy map containing dense GRM metadata e.g. `[ id:'plink_simulated' ]` `meta.id` is preserved from the input dense GRM basename contract. - - "*_sp.grm.id": - type: file - description: Sparse GRM sample identifier file - pattern: "*_sp.grm.id" - ontologies: [] - - "*_sp.grm.sp": + - "*_sp.grm.*": type: file - description: Sparse GRM matrix file - pattern: "*_sp.grm.sp" + description: Sparse GRM sidecar files + pattern: "*_sp.grm.{id,sp}" ontologies: [] versions_gcta: - - "${task.process}": @@ -65,7 +54,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version @@ -77,11 +66,11 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version authors: - - "@andongni" + - "@lyh970817" maintainers: - - "@andongni" + - "@lyh970817" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test index 03118bc0ed49..f7a787eb3617 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -8,17 +8,17 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/makebksparse" - tag "gcta/makegrmpart" + tag "gcta/makegrm" setup { - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_CONTRACT") { - script "../../makegrmpart/main.nf" + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") { + script "../../makegrm/main.nf" process { """ file('contract_dense.mbfile').text = 'plink_simulated\\n' input[0] = [ - [ id:'contract_dense', part_gcta_job:1, nparts_gcta:1 ], + [ id:'contract_dense' ], file('contract_dense.mbfile'), [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) @@ -30,19 +30,18 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] ] - input[1] = [[ id:'all_variants' ], []] """ } } - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_STUB") { - script "../../makegrmpart/main.nf" + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") { + script "../../makegrm/main.nf" process { """ file('stub_dense.mbfile').text = 'plink_simulated\\n' input[0] = [ - [ id:'stub_dense', part_gcta_job:1, nparts_gcta:1 ], + [ id:'stub_dense' ], file('stub_dense.mbfile'), [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) @@ -54,7 +53,6 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] ] - input[1] = [[ id:'all_variants' ], []] """ } } @@ -66,10 +64,7 @@ nextflow_process { when { process { """ - input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files input[1] = Channel.value(0.05) """ } @@ -79,7 +74,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense.part_1_1" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" }, { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } ) @@ -92,11 +87,8 @@ nextflow_process { when { process { """ - input[0] = GCTA_MAKEGRMPART_CONTRACT.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - }.map { meta, grm_id, grm_bin, grm_n_bin -> - [[ id:'contract_dense_mismatch' ], grm_id, grm_bin, grm_n_bin] + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files -> + [[ id:'contract_dense_mismatch' ], grm_files] } input[1] = Channel.value(0.05) """ @@ -118,10 +110,7 @@ nextflow_process { when { process { """ - input[0] = GCTA_MAKEGRMPART_STUB.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } + input[0] = GCTA_MAKEGRM_STUB.out.grm_files input[1] = Channel.value(0.05) """ } @@ -131,7 +120,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.sparse_grm_files.size() == 1 }, - { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense.part_1_1" }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" }, { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } ) diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap index 6cbf64c83810..3ba0d27d80f0 100644 --- a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -4,10 +4,12 @@ [ [ { - "id": "stub_dense.part_1_1" + "id": "stub_dense" }, - "stub_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "stub_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ] ], @@ -15,7 +17,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T20:34:14.13446136" + "timestamp": "2026-05-15T21:12:21.136559698" }, "versions": { "content": [ @@ -40,10 +42,12 @@ [ [ { - "id": "contract_dense.part_1_1" + "id": "contract_dense" }, - "contract_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "contract_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + [ + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] ] ] ], @@ -51,7 +55,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T20:33:53.993675061" + "timestamp": "2026-05-15T21:09:25.501833656" }, "stub_versions": { "content": [ diff --git a/modules/nf-core/gcta/makegrm/environment.yml b/modules/nf-core/gcta/makegrm/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makegrm/main.nf b/modules/nf-core/gcta/makegrm/main.nf new file mode 100644 index 000000000000..0bb78639e361 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/main.nf @@ -0,0 +1,42 @@ +process GCTA_MAKEGRM { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam) + + output: + tuple val(meta), path("*.grm.*"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def extra_args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen] + def genotype_extension = genotype_files[0].name.tokenize('.').last() + def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile' + + """ + + gcta \\ + ${multi_file_flag} ${mfile} \\ + --make-grm \\ + --thread-num ${task.cpus} \\ + --out ${prefix} ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.grm.id + touch ${prefix}.grm.bin + touch ${prefix}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/makegrm/meta.yml b/modules/nf-core/gcta/makegrm/meta.yml new file mode 100644 index 000000000000..0c813dadada6 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/meta.yml @@ -0,0 +1,91 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makegrm" +description: Compute a whole dense GRM with GCTA +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: + - "GPL-3.0-only" + identifier: biotools:gcta + +input: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - mfile: + type: file + description: GCTA multi-input manifest consumed by `--mbfile` or + `--mpfile` + pattern: "*.{mbfile,mpfile,txt}" + ontologies: + - edam: "http://edamontology.org/format_2330" + - bed_pgen: + type: file + description: Collection of PLINK primary genotype files referenced by the + multi-input manifest + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: Collection of PLINK variant metadata files referenced by the + multi-input manifest + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: Collection of PLINK sample metadata files referenced by the + multi-input manifest + pattern: "*.{fam,psam}" + ontologies: [] + +output: + grm_files: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - "*.grm.*": + type: file + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + versions_gcta: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test b/modules/nf-core/gcta/makegrm/tests/main.nf.test new file mode 100644 index 000000000000..e5c63233678b --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test @@ -0,0 +1,142 @@ +nextflow_process { + + name "Test Process GCTA_MAKEGRM" + script "../main.nf" + process "GCTA_MAKEGRM" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makegrm" + + test("homo_sapiens popgen - plink2") { + when { + process { + """ + file('gcta_grm.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n' + + input[0] = [ + [ id:'gcta_grm' ], + file('gcta_grm.mpfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm.grm.id', + 'gcta_grm.grm.bin', + 'gcta_grm.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mpfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1") { + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm_bed' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm_bed.grm.id', + 'gcta_grm_bed.grm.bin', + 'gcta_grm_bed.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mbfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 - stub") { + options "-stub" + + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap new file mode 100644 index 000000000000..fe928ae98079 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap @@ -0,0 +1,103 @@ +{ + "homo_sapiens popgen - plink2": { + "content": [ + [ + [ + { + "id": "gcta_grm" + }, + "gcta_grm.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "gcta_grm.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T20:22:02.639433902" + }, + "homo_sapiens popgen - plink1": { + "content": [ + [ + [ + { + "id": "gcta_grm_bed" + }, + "gcta_grm_bed.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "gcta_grm_bed.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm_bed.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T20:22:52.907133415" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "gcta_grm_bed" + }, + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ], + "grm_files": [ + [ + { + "id": "gcta_grm_bed" + }, + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T20:23:43.032260106" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makegrm/tests/nextflow.config b/modules/nf-core/gcta/makegrm/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} From afe48850ccc5bad65823df3be815c39c01fa8945 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 16 May 2026 18:35:09 +0800 Subject: [PATCH 6/6] Sync gcta/makegrm setup module --- .../gcta/makegrm/tests/main.nf.test.snap | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap index fe928ae98079..f8fbe133d5a2 100644 --- a/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap @@ -6,9 +6,11 @@ { "id": "gcta_grm" }, - "gcta_grm.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "gcta_grm.grm.bin:md5,45f8dff14bda17d50009a21050572228", - "gcta_grm.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + [ + "gcta_grm.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] ] ], { @@ -25,7 +27,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-15T20:22:02.639433902" + "timestamp": "2026-05-15T21:08:43.209734458" }, "homo_sapiens popgen - plink1": { "content": [ @@ -34,9 +36,11 @@ { "id": "gcta_grm_bed" }, - "gcta_grm_bed.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", - "gcta_grm_bed.grm.bin:md5,45f8dff14bda17d50009a21050572228", - "gcta_grm_bed.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + [ + "gcta_grm_bed.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm_bed.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm_bed.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] ] ], { @@ -53,7 +57,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-15T20:22:52.907133415" + "timestamp": "2026-05-15T21:09:34.058651287" }, "homo_sapiens popgen - plink1 - stub": { "content": [ @@ -63,9 +67,11 @@ { "id": "gcta_grm_bed" }, - "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "1": [ @@ -80,9 +86,11 @@ { "id": "gcta_grm_bed" }, - "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", - "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", - "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] ], "versions_gcta": [ @@ -98,6 +106,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-05-15T20:23:43.032260106" + "timestamp": "2026-05-15T21:10:21.024687128" } } \ No newline at end of file