diff --git a/modules/nf-core/gcta/addgrms/environment.yml b/modules/nf-core/gcta/addgrms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/addgrms/main.nf b/modules/nf-core/gcta/addgrms/main.nf new file mode 100644 index 000000000000..74e7c65959d7 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/main.nf @@ -0,0 +1,37 @@ +process GCTA_ADDGRMS { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(mgrm_file), path(grm_files) + + output: + tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: combined_grm + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --mgrm ${mgrm_file} \\ + --make-grm \\ + --out ${meta.id} \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}.grm.id + touch ${meta.id}.grm.bin + touch ${meta.id}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/addgrms/meta.yml b/modules/nf-core/gcta/addgrms/meta.yml new file mode 100644 index 000000000000..06b3d7afb7e6 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/meta.yml @@ -0,0 +1,81 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_addgrms" +description: Combine multiple GRMs listed in an MGRM manifest into a single dense GRM +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - mgrm_file: + type: file + description: MGRM manifest listing the GRM prefixes to combine + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + +output: + combined_grm: + - - meta: + type: map + description: | + Groovy map containing combined GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}.grm.id": + type: file + description: Combined GRM sample identifier file + pattern: "${meta.id}.grm.id" + ontologies: [] + - "${meta.id}.grm.bin": + type: file + description: Combined GRM binary matrix file + pattern: "${meta.id}.grm.bin" + ontologies: [] + - "${meta.id}.grm.N.bin": + type: file + description: Combined GRM sample-count matrix file + pattern: "${meta.id}.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test b/modules/nf-core/gcta/addgrms/tests/main.nf.test new file mode 100644 index 000000000000..db96ae31cee2 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test @@ -0,0 +1,153 @@ +nextflow_process { + + name "Test Process GCTA_ADDGRMS" + script "../main.nf" + process "GCTA_ADDGRMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/addgrms" + tag "gcta/makegrmpart" + tag "gawk" + + setup { + run("GAWK", alias: "GAWK_COMPLEMENT_VARIANTS") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated_complement' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ] + ] + input[1] = Channel.of('FNR == NR { keep[\$1] = 1; next } !(\$2 in keep) { print \$2 }').collectFile(name:'complement_variants.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'plink_random_selected_snp' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true) + ] + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = GAWK_COMPLEMENT_VARIANTS.out.output + """ + } + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm") { + config "./nextflow.config" + + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.combined_grm.size() == 1 }, + { assert process.out.combined_grm.get(0).get(0).id == "plink_simulated_ldms" }, + { + assert snapshot( + process.out.combined_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - merge dense GRMs from mgrm - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = mgrm_file + .combine(grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap new file mode 100644 index 000000000000..74190b563549 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "homo_sapiens popgen - merge dense GRMs from mgrm - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_ldms" + }, + "plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_ADDGRMS", + "gcta", + "1.94.1" + ] + ], + "combined_grm": [ + [ + { + "id": "plink_simulated_ldms" + }, + "plink_simulated_ldms.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_ldms.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:16:18.17102219" + }, + "homo_sapiens popgen - merge dense GRMs from mgrm": { + "content": [ + [ + [ + { + "id": "plink_simulated_ldms" + }, + "plink_simulated_ldms.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_ldms.grm.bin:md5,e6a56e44acd03f87043435c382fe0149", + "plink_simulated_ldms.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADDGRMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:38:05.74494821" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/addgrms/tests/nextflow.config b/modules/nf-core/gcta/addgrms/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/addgrms/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/adjustgrm/environment.yml b/modules/nf-core/gcta/adjustgrm/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/adjustgrm/main.nf b/modules/nf-core/gcta/adjustgrm/main.nf new file mode 100644 index 000000000000..137011a9bf9e --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/main.nf @@ -0,0 +1,40 @@ +process GCTA_ADJUSTGRM { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + val grm_adj + + output: + tuple val(meta), path("${meta.id}_adj.grm.id"), path("${meta.id}_adj.grm.bin"), path("${meta.id}_adj.grm.N.bin"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def grm_adj_value = (grm_adj == null || grm_adj == '') ? 0 : grm_adj + + """ + gcta \\ + --grm ${meta.id} \\ + --grm-adj ${grm_adj_value} \\ + --make-grm \\ + --out ${meta.id}_adj \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_adj.grm.id + touch ${meta.id}_adj.grm.bin + touch ${meta.id}_adj.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/adjustgrm/meta.yml b/modules/nf-core/gcta/adjustgrm/meta.yml new file mode 100644 index 000000000000..6d093af721e4 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/meta.yml @@ -0,0 +1,90 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_adjustgrm" +description: Adjust a dense GRM for incomplete tagging using `gcta --grm-adj` +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - grm_adj: + type: integer + description: | + GRM adjustment value passed to `--grm-adj`. + When an empty string is supplied, the module falls back to `0`. + +output: + grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_adj.grm.id": + type: file + description: Adjusted GRM sample identifier file + pattern: "${meta.id}_adj.grm.id" + ontologies: [] + - "${meta.id}_adj.grm.bin": + type: file + description: Adjusted GRM binary matrix file + pattern: "${meta.id}_adj.grm.bin" + ontologies: [] + - "${meta.id}_adj.grm.N.bin": + type: file + description: Adjusted GRM sample-count matrix file + pattern: "${meta.id}_adj.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test new file mode 100644 index 000000000000..947453dc0866 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test @@ -0,0 +1,128 @@ +nextflow_process { + + name "Test Process GCTA_ADJUSTGRM" + script "../main.nf" + process "GCTA_ADJUSTGRM" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/adjustgrm" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - adjust dense GRM") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - adjust dense GRM with fallback default") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - adjust dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap new file mode 100644 index 000000000000..47867f8dcb57 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/tests/main.nf.test.snap @@ -0,0 +1,103 @@ +{ + "homo_sapiens popgen - adjust dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "1.94.1" + ] + ], + "grm_files": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_adj.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:35:38.715590031" + }, + "homo_sapiens popgen - adjust dense GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_adj.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,2daf6b143fde26dfe8e340237443ffaf", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T23:53:57.519973105" + }, + "homo_sapiens popgen - adjust dense GRM with fallback default": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_adj.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_adj.grm.bin:md5,6d16a365bd94b621963769e8314eeaa0", + "plink_simulated_dense.part_1_1_adj.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_ADJUSTGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-17T00:00:08.732593482" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/adjustgrm/tests/nextflow.config b/modules/nf-core/gcta/adjustgrm/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/adjustgrm/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/bivariatereml/environment.yml b/modules/nf-core/gcta/bivariatereml/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/bivariatereml/main.nf b/modules/nf-core/gcta/bivariatereml/main.nf new file mode 100644 index 000000000000..039b7f295ad0 --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/main.nf @@ -0,0 +1,47 @@ +process GCTA_BIVARIATEREML { + tag "bivariate_reml_${meta.id}_${meta2.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(phenotype_file) + tuple val(meta2), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: bivariate_results + tuple val(meta), path("${meta.id}.log"), emit: log_file + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def pair_id = meta.id + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml-bivar 1 2 \\ + --grm ${meta2.id} \\ + --pheno "${phenotype_file}" \\ + ${qcovar_param} \\ + ${covar_param} \\ + --out "${pair_id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + touch "${meta.id}.log" + """ +} diff --git a/modules/nf-core/gcta/bivariatereml/meta.yml b/modules/nf-core/gcta/bivariatereml/meta.yml new file mode 100644 index 000000000000..7b64fd3a392c --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/meta.yml @@ -0,0 +1,118 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_bivariatereml" +description: Run bivariate REML analysis with a single dense GRM +keywords: + - gcta + - reml + - bivariate + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - phenotype_file: + type: file + description: Shared bivariate phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + bivariate_results: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.hsq": + type: file + description: Bivariate REML result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + log_file: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.log": + type: file + description: Bivariate REML log file + pattern: "${meta.id}.log" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test new file mode 100644 index 000000000000..442abd673f1f --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test @@ -0,0 +1,147 @@ +nextflow_process { + name "Test Process GCTA_BIVARIATEREML" + script "../main.nf" + process "GCTA_BIVARIATEREML" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/bivariatereml" + tag "gcta/makegrmpart" + tag "gawk" + + setup { + run("GAWK", alias: "GAWK_BIVARIATE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'Trait1__Trait2' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + ] + input[1] = Channel.of('FNR == NR { if (FNR == 1) next; trait1[\$2] = \$3; next } FNR == 1 { next } { print \$1, \$2, trait1[\$2], \$4 }').collectFile(name:'bivariate_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - bivariate phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.bivariate_results.size() == 1 }, + { assert process.out.log_file.size() == 1 }, + { assert process.out.bivariate_results.get(0).get(0).id == "Trait1__Trait2" }, + { assert file(process.out.log_file.get(0).get(1)).name == "Trait1__Trait2.log" }, + { assert file(process.out.log_file.get(0).get(1)).exists() }, + { + assert snapshot( + process.out.bivariate_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - bivariate phenotype with dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = dense_grm + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap new file mode 100644 index 000000000000..d16589847d48 --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/tests/main.nf.test.snap @@ -0,0 +1,85 @@ +{ + "homo_sapiens popgen - bivariate phenotype with dense GRM and covariates": { + "content": [ + [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,dab8c6af3e42c9e359825f8f7e6e6fce" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_BIVARIATEREML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:00:59.97128964" + }, + "homo_sapiens popgen - bivariate phenotype with dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "GCTA_BIVARIATEREML", + "gcta", + "1.94.1" + ] + ], + "bivariate_results": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_file": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_BIVARIATEREML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T13:53:21.898185124" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/bivariatereml/tests/nextflow.config b/modules/nf-core/gcta/bivariatereml/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/bivariatereml/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/bivariateremlldms/environment.yml b/modules/nf-core/gcta/bivariateremlldms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/bivariateremlldms/main.nf b/modules/nf-core/gcta/bivariateremlldms/main.nf new file mode 100644 index 000000000000..fd34a7a5699c --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/main.nf @@ -0,0 +1,49 @@ +process GCTA_BIVARIATEREMLLDMS { + tag "bivariate_reml_ldms_${meta.id}_${meta2.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(phenotype_file) + tuple val(meta2), path(mgrm_file), path(grm_files) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: bivariate_results + tuple val(meta), path("${meta.id}.log"), emit: log_file + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def pair_id = meta.id + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml-bivar 1 2 \\ + --mgrm ${mgrm_file} \\ + --pheno "${phenotype_file}" \\ + ${qcovar_param} \\ + ${covar_param} \\ + --reml-bivar-no-constrain \\ + --reml-maxit 500 \\ + --out "${pair_id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + touch "${meta.id}.log" + """ +} diff --git a/modules/nf-core/gcta/bivariateremlldms/meta.yml b/modules/nf-core/gcta/bivariateremlldms/meta.yml new file mode 100644 index 000000000000..1cfafb2a1320 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/meta.yml @@ -0,0 +1,115 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_bivariateremlldms" +description: Run bivariate REML-LDMS analysis with an MGRM manifest +keywords: + - gcta + - reml + - bivariate + - ldms + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - phenotype_file: + type: file + description: Shared bivariate phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing MGRM metadata + e.g. `[ id:'plink_simulated_ldms' ]` + - mgrm_file: + type: file + description: MGRM manifest file + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + bivariate_results: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.hsq": + type: file + description: Bivariate REML-LDMS result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + log_file: + - - meta: + type: map + description: | + Groovy map containing shared bivariate phenotype metadata + `meta.id` must contain both trait names, for example `trait1__trait2` + - "${meta.id}.log": + type: file + description: Bivariate REML-LDMS log file + pattern: "${meta.id}.log" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test new file mode 100644 index 000000000000..b4af8b8c6d08 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + name "Test Process GCTA_BIVARIATEREMLLDMS" + script "../main.nf" + process "GCTA_BIVARIATEREMLLDMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/bivariateremlldms" + tag "gcta/makegrmpart" + tag "gawk" + + setup { + run("GAWK", alias: "GAWK_BIVARIATE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'Trait1__Trait2' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + ] + input[1] = Channel.of('FNR == NR { if (FNR == 1) next; trait1[\$2] = \$3; next } FNR == 1 { next } { print \$1, \$2, trait1[\$2], \$4 }').collectFile(name:'bivariate_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_COMPLEMENT_VARIANTS") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated_complement' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ] + ] + input[1] = Channel.of('FNR == NR { keep[\$1] = 1; next } !(\$2 in keep) { print \$2 }').collectFile(name:'complement_variants.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_ldms1', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'plink_random_selected_snp' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_random_selected_snp.txt', checkIfExists: true) + ] + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_ldms2', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = GAWK_COMPLEMENT_VARIANTS.out.output + """ + } + } + } + + test("homo_sapiens popgen - bivariate phenotype with ldms mgrm and covariates") { + config "./nextflow.config" + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.bivariate_results.size() == 1 }, + { assert process.out.log_file.size() == 1 }, + { assert process.out.bivariate_results.get(0).get(0).id == "Trait1__Trait2" }, + { assert file(process.out.log_file.get(0).get(1)).name == "Trait1__Trait2.log" }, + { assert file(process.out.log_file.get(0).get(1)).exists() }, + { + def resultFile = file(process.out.bivariate_results.get(0).get(1)) + assert snapshot( + [ + [ + process.out.bivariate_results.get(0).get(0), + resultFile.name, + resultFile.readLines().collect { it.tokenize()[0] } + ] + ], + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - bivariate phenotype with ldms mgrm - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + mgrm_file = Channel + .of('plink_simulated_ldms1.part_1_1\\nplink_simulated_ldms2.part_1_1') + .collectFile(name:'plink_simulated_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_BIVARIATE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'plink_simulated_ldms' ], row[0], row[1..-1]] } + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap new file mode 100644 index 000000000000..44d0b0b2ca71 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "homo_sapiens popgen - bivariate phenotype with ldms mgrm and covariates": { + "content": [ + [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq", + [ + "Source", + "V(G1)_tr1", + "V(G1)_tr2", + "C(G1)_tr12", + "V(G2)_tr1", + "V(G2)_tr2", + "C(G2)_tr12", + "V(e)_tr1", + "V(e)_tr2", + "C(e)_tr12", + "Vp_tr1", + "Vp_tr2", + "V(G1)/Vp_tr1", + "V(G1)/Vp_tr2", + "V(G2)/Vp_tr1", + "V(G2)/Vp_tr2", + "rG1", + "rG2", + "logL", + "n" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_BIVARIATEREMLLDMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-14T22:25:42.183313239" + }, + "homo_sapiens popgen - bivariate phenotype with ldms mgrm - stub": { + "content": [ + { + "0": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "GCTA_BIVARIATEREMLLDMS", + "gcta", + "1.94.1" + ] + ], + "bivariate_results": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_file": [ + [ + { + "id": "Trait1__Trait2" + }, + "Trait1__Trait2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_BIVARIATEREMLLDMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-14T22:25:54.732574761" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/bivariateremlldms/tests/nextflow.config b/modules/nf-core/gcta/bivariateremlldms/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/bivariateremlldms/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/calculateldscores/environment.yml b/modules/nf-core/gcta/calculateldscores/environment.yml new file mode 100644 index 000000000000..c1556bf88999 --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 + - conda-forge::r-base=4.3.1 diff --git a/modules/nf-core/gcta/calculateldscores/main.nf b/modules/nf-core/gcta/calculateldscores/main.nf new file mode 100644 index 000000000000..8ee1b4d57828 --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/main.nf @@ -0,0 +1,66 @@ +process GCTA_CALCULATELDSCORES { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta_r-base:31127c93877b38de' : + 'community.wave.seqera.io/library/gcta_r-base:31127c93877b38de' }" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + val ld_score_region + + output: + tuple val(meta), path("${meta.id}_gcta_ld.score.ld"), emit: ld_scores + tuple val(meta), path("${meta.id}_snp_group1.txt"), path("${meta.id}_snp_group2.txt"), path("${meta.id}_snp_group3.txt"), path("${meta.id}_snp_group4.txt"), emit: snp_group_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --bfile ${meta.id} \\ + --ld-score-region ${ld_score_region} \\ + --out ${meta.id}_gcta_ld \\ + --thread-num ${task.cpus} ${extra_args} + + Rscript - ${meta.id}_gcta_ld.score.ld ${meta.id} <<'EOF' + args <- commandArgs(trailingOnly = TRUE) + filename <- args[1] + out_prefix <- args[2] + + lds_seg <- read.table( + filename, + header = TRUE, + colClasses = c("character", rep("numeric", 8)) + ) + + quartiles <- summary(lds_seg\$ldscore_SNP) + + lb1 <- which(lds_seg\$ldscore_SNP <= quartiles[2]) + lb2 <- which(lds_seg\$ldscore_SNP > quartiles[2] & lds_seg\$ldscore_SNP <= quartiles[3]) + lb3 <- which(lds_seg\$ldscore_SNP > quartiles[3] & lds_seg\$ldscore_SNP <= quartiles[5]) + lb4 <- which(lds_seg\$ldscore_SNP > quartiles[5]) + + write.table(lds_seg\$SNP[lb1], paste(out_prefix, "snp_group1.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + write.table(lds_seg\$SNP[lb2], paste(out_prefix, "snp_group2.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + write.table(lds_seg\$SNP[lb3], paste(out_prefix, "snp_group3.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + write.table(lds_seg\$SNP[lb4], paste(out_prefix, "snp_group4.txt", sep = "_"), row.names = FALSE, quote = FALSE, col.names = FALSE, append = TRUE) + EOF + """ + + stub: + """ + printf "SNP\tA1\tA2\tFreq\tb\tse\tp\tldscore\n" > ${meta.id}_gcta_ld.score.ld + printf "stub_snp1\n" > ${meta.id}_snp_group1.txt + printf "stub_snp2\n" > ${meta.id}_snp_group2.txt + printf "stub_snp3\n" > ${meta.id}_snp_group3.txt + printf "stub_snp4\n" > ${meta.id}_snp_group4.txt + """ +} diff --git a/modules/nf-core/gcta/calculateldscores/meta.yml b/modules/nf-core/gcta/calculateldscores/meta.yml new file mode 100644 index 000000000000..666d1128895b --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/meta.yml @@ -0,0 +1,110 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_calculateldscores" +description: Calculate LD scores with GCTA and segment variants into LD-based SNP groups +keywords: + - gcta + - ld score + - ldms + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: ["GPL-3.0-only"] +input: + - - meta: + type: map + description: | + Groovy Map containing PLINK1 genotype metadata + e.g. `[ id:'plink_simulated' ]` + - bed: + type: file + description: PLINK1 genotype primary file + pattern: "*.{bed}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim: + type: file + description: PLINK1 variant metadata file + pattern: "*.{bim}" + ontologies: [] + - fam: + type: file + description: PLINK1 sample metadata file + pattern: "*.{fam}" + ontologies: [] + - ld_score_region: + type: integer + description: | + LD-score region width passed to `--ld-score-region`. + Callers should pass `200` for the default GCTA region width unless they are intentionally overriding it. +output: + ld_scores: + - - meta: + type: map + description: | + Groovy Map containing PLINK1 genotype metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_gcta_ld.score.ld": + type: file + description: GCTA LD score output file + pattern: "${meta.id}_gcta_ld.score.ld" + ontologies: + - edam: "http://edamontology.org/format_2330" + snp_group_files: + - - meta: + type: map + description: | + Groovy Map containing PLINK1 genotype metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_snp_group1.txt": + type: file + description: First LD-derived SNP-group file + pattern: "${meta.id}_snp_group1.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + - "${meta.id}_snp_group2.txt": + type: file + description: Second LD-derived SNP-group file + pattern: "${meta.id}_snp_group2.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + - "${meta.id}_snp_group3.txt": + type: file + description: Third LD-derived SNP-group file + pattern: "${meta.id}_snp_group3.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + - "${meta.id}_snp_group4.txt": + type: file + description: Fourth LD-derived SNP-group file + pattern: "${meta.id}_snp_group4.txt" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to generate the version of the tool +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test new file mode 100644 index 000000000000..bd28c7a09e0d --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test @@ -0,0 +1,76 @@ +nextflow_process { + + name "Test Process GCTA_CALCULATELDSCORES" + script "../main.nf" + process "GCTA_CALCULATELDSCORES" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/calculateldscores" + tag "plink" + + test("homo_sapiens gsmr - plink1") { + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'bfile' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = 50 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.ld_scores.size() == 1 }, + { assert process.out.snp_group_files.size() == 1 }, + { assert process.out.ld_scores.get(0).get(0).id == 'bfile' }, + { + def snpGroups = process.out.snp_group_files.get(0) + assert snpGroups.get(0).id == 'bfile' + assert (1..4).every { idx -> path(snpGroups.get(idx)).exists() } + }, + { + assert snapshot( + process.out.ld_scores, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens gsmr - plink1 - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'bfile' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = 200 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap new file mode 100644 index 000000000000..287c097cf2c8 --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/tests/main.nf.test.snap @@ -0,0 +1,91 @@ +{ + "homo_sapiens gsmr - plink1": { + "content": [ + [ + [ + { + "id": "bfile" + }, + "bfile_gcta_ld.score.ld:md5,ccfd5ff1898853f1c02e7572aaa335cf" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_CALCULATELDSCORES", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T21:45:20.766416151" + }, + "homo_sapiens gsmr - plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "bfile" + }, + "bfile_gcta_ld.score.ld:md5,1750d635cb5186c5b09e5e34515cd19d" + ] + ], + "1": [ + [ + { + "id": "bfile" + }, + "bfile_snp_group1.txt:md5,a48b3426cc201c1c0be8e11cf34e9365", + "bfile_snp_group2.txt:md5,6d0959c8d45c313c024c6d3fa5c8c630", + "bfile_snp_group3.txt:md5,931517470f35d95b31209c555bd159cf", + "bfile_snp_group4.txt:md5,cce7bf7b3cf0a016872193a1283153ab" + ] + ], + "2": [ + [ + "GCTA_CALCULATELDSCORES", + "gcta", + "1.94.1" + ] + ], + "ld_scores": [ + [ + { + "id": "bfile" + }, + "bfile_gcta_ld.score.ld:md5,1750d635cb5186c5b09e5e34515cd19d" + ] + ], + "snp_group_files": [ + [ + { + "id": "bfile" + }, + "bfile_snp_group1.txt:md5,a48b3426cc201c1c0be8e11cf34e9365", + "bfile_snp_group2.txt:md5,6d0959c8d45c313c024c6d3fa5c8c630", + "bfile_snp_group3.txt:md5,931517470f35d95b31209c555bd159cf", + "bfile_snp_group4.txt:md5,cce7bf7b3cf0a016872193a1283153ab" + ] + ], + "versions_gcta": [ + [ + "GCTA_CALCULATELDSCORES", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T16:01:03.508961363" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/calculateldscores/tests/nextflow.config b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config new file mode 100644 index 000000000000..80c45aadd02f --- /dev/null +++ b/modules/nf-core/gcta/calculateldscores/tests/nextflow.config @@ -0,0 +1,9 @@ +params { + modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} + +process { + withName: "GCTA_CALCULATELDSCORES" { + cpus = 1 + } +} diff --git a/modules/nf-core/gcta/fastgwa/environment.yml b/modules/nf-core/gcta/fastgwa/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/fastgwa/main.nf b/modules/nf-core/gcta/fastgwa/main.nf new file mode 100644 index 000000000000..be40a6309be5 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/main.nf @@ -0,0 +1,55 @@ +process GCTA_FASTGWA { + tag "${meta.id}:${meta3.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(bed_pgen), path(bim_pvar), path(fam_psam) + tuple val(meta2), path(sparse_grm_id), path(sparse_grm_sp) + tuple val(meta3), path(phenotype_file) + tuple val(meta4), path(quant_covariates_file) + tuple val(meta5), path(cat_covariates_file) + val mlm_exact + + output: + tuple val(meta), val(meta3), path("${meta.id}_${meta3.id}.fastGWA"), emit: results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def mpheno_arg = meta3.mpheno ? "--mpheno ${meta3.mpheno}" : '' + def grm_arg = meta3.is_binary ? '' : "--grm-sparse ${meta2.id}" + def genotype_suffix = bed_pgen.name.tokenize('.').last() + def genotype_flag = genotype_suffix == 'pgen' ? '--pfile' : '--bfile' + def genotype_prefix = meta.id + def out = "${meta.id}_${meta3.id}" + def extra_args = task.ext.args ?: '' + def mode_arg = meta3.is_binary ? '--fastGWA-lr' : (mlm_exact ? '--fastGWA-mlm-exact' : '--fastGWA-mlm') + + """ + set -euo pipefail + + gcta \\ + ${genotype_flag} ${genotype_prefix} \\ + ${grm_arg} \\ + ${mode_arg} \\ + --pheno ${phenotype_file} \\ + ${qcovar_arg} \\ + ${covar_arg} \\ + ${mpheno_arg} \\ + --thread-num ${task.cpus} \\ + --out ${out} ${extra_args} + """ + + stub: + """ + touch ${meta.id}_${meta3.id}.fastGWA + """ +} diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml new file mode 100644 index 000000000000..768eb1ec3189 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -0,0 +1,135 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_fastgwa" +description: Run GCTA fastGWA-MLM with PLINK genotype inputs and a sparse GRM +keywords: + - gcta + - fastgwa + - gwas + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - bed_pgen: + type: file + description: PLINK primary genotype file, either `.bed` or `.pgen` + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: PLINK sidecar file, either `.bim` or `.pvar` + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: PLINK sidecar file, either `.fam` or `.psam` + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy map containing sparse GRM metadata + e.g. `[ id:'plink_simulated_sp' ]` + Used for non-binary fastGWA MLM modes and ignored when `meta3.is_binary` is true + - sparse_grm_id: + type: file + description: Sparse GRM ID file (`.grm.id`), ignored when `meta3.is_binary` is true + pattern: "*.grm.id" + ontologies: [] + - sparse_grm_sp: + type: file + description: Sparse GRM sparse matrix file (`.grm.sp`), ignored when `meta3.is_binary` is true + pattern: "*.grm.sp" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait', is_binary:false ]` + `meta3.is_binary` is required and selects logistic vs MLM fastGWA mode + Optional phenotype selector may be supplied as `meta3.mpheno` + - phenotype_file: + type: file + description: Phenotype file + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta5: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - mlm_exact: + type: boolean + description: | + Apply `--fastGWA-mlm-exact` for non-binary phenotypes. + Ignored when `meta3.is_binary` is true because binary phenotypes use `--fastGWA-lr`. +output: + results: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - meta3: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - "${meta.id}_${meta3.id}.fastGWA": + type: file + description: FastGWA association results + pattern: "${meta.id}_${meta3.id}.fastGWA" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test new file mode 100644 index 000000000000..6889d8a002d1 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -0,0 +1,237 @@ +nextflow_process { + + name "Test Process GCTA_FASTGWA" + script "../main.nf" + process "GCTA_FASTGWA" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/fastgwa" + tag "gcta/makegrmpart" + tag "gcta/makebksparse" + tag "gawk" + config "./nextflow.config" + + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_BINARY_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'BinaryTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'binary_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + + run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") { + script "../../makebksparse/main.nf" + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = quantitative_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "QuantitativeTrait" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_QuantitativeTrait.fastGWA" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and binary phenotype") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:true ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = binary_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(1).id == "BinaryTrait" }, + { assert path(process.out.results.get(0).get(2)).fileName.toString() == "plink_simulated_BinaryTrait.fastGWA" }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink2 with sparse GRM - stub") { + options "-stub" + + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_id, sparse_grm_sp -> + [[ id:meta.id + '_sp' ], sparse_grm_id, sparse_grm_sp] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id, is_binary:false ], phenotype_file] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = quantitative_pheno + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap new file mode 100644 index 000000000000..b3bd73e97fb5 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -0,0 +1,111 @@ +{ + "homo_sapiens popgen - plink2 with sparse GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ], + "results": [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T18:15:29.679228048" + }, + "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "QuantitativeTrait", + "is_binary": false + }, + "plink_simulated_QuantitativeTrait.fastGWA:md5,d9190e07273a3de2a15a6e7053aed487" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:01:13.458535259" + }, + "homo_sapiens popgen - plink1 with sparse GRM and binary phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + { + "id": "BinaryTrait", + "is_binary": true + }, + "plink_simulated_BinaryTrait.fastGWA:md5,723602dcb94b8a08b3652f1491dcd2ee" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T18:15:22.74128729" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/fastgwa/tests/nextflow.config b/modules/nf-core/gcta/fastgwa/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/filtergrmwithkeep/environment.yml b/modules/nf-core/gcta/filtergrmwithkeep/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/filtergrmwithkeep/main.nf b/modules/nf-core/gcta/filtergrmwithkeep/main.nf new file mode 100644 index 000000000000..f0afdf932e7d --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/main.nf @@ -0,0 +1,39 @@ +process GCTA_FILTERGRMWITHKEEP { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta2), path(keep_file) + + output: + tuple val(meta), path("${meta.id}_unrel.grm.id"), path("${meta.id}_unrel.grm.bin"), path("${meta.id}_unrel.grm.N.bin"), emit: filtered_grm + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --keep ${keep_file} \\ + --make-grm \\ + --out ${meta.id}_unrel \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_unrel.grm.id + touch ${meta.id}_unrel.grm.bin + touch ${meta.id}_unrel.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/filtergrmwithkeep/meta.yml b/modules/nf-core/gcta/filtergrmwithkeep/meta.yml new file mode 100644 index 000000000000..88788f8a6de1 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/meta.yml @@ -0,0 +1,96 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_filtergrmwithkeep" +description: Filter a dense GRM to the individuals listed in a keep file +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - - meta2: + type: map + description: | + Groovy map containing keep-file metadata + e.g. `[ id:'plink_simulated_keep' ]` + - keep_file: + type: file + description: Keep file listing the individuals to retain + pattern: "*.{keep,txt,id}" + ontologies: + - edam: "http://edamontology.org/format_2330" + +output: + filtered_grm: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_unrel.grm.id": + type: file + description: Filtered GRM sample identifier file + pattern: "${meta.id}_unrel.grm.id" + ontologies: [] + - "${meta.id}_unrel.grm.bin": + type: file + description: Filtered GRM binary matrix file + pattern: "${meta.id}_unrel.grm.bin" + ontologies: [] + - "${meta.id}_unrel.grm.N.bin": + type: file + description: Filtered GRM sample-count matrix file + pattern: "${meta.id}_unrel.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test new file mode 100644 index 000000000000..53de4d1bab0c --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process GCTA_FILTERGRMWITHKEEP" + script "../main.nf" + process "GCTA_FILTERGRMWITHKEEP" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/filtergrmwithkeep" + tag "gcta/makegrmpart" + tag "gcta/removerelatedsubjects" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + + run("GCTA_REMOVERELATEDSUBJECTS", alias: "GCTA_REMOVERELATEDSUBJECTS_KEEP") { + script "../../removerelatedsubjects/main.nf" + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + """ + } + } + } + + test("homo_sapiens popgen - filter dense GRM with keep file") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.filtered_grm.size() == 1 }, + { assert process.out.filtered_grm.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.filtered_grm, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - filter dense GRM with keep file - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = GCTA_REMOVERELATEDSUBJECTS_KEEP.out.keep_file + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap new file mode 100644 index 000000000000..949d1ab7da03 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "homo_sapiens popgen - filter dense GRM with keep file": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", + "plink_simulated_dense.part_1_1_unrel.grm.bin:md5,b1f124463eecbae86840a6651eec372d", + "plink_simulated_dense.part_1_1_unrel.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FILTERGRMWITHKEEP", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:03.137610597" + }, + "homo_sapiens popgen - filter dense GRM with keep file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_FILTERGRMWITHKEEP", + "gcta", + "1.94.1" + ] + ], + "filtered_grm": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_FILTERGRMWITHKEEP", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:09.583271039" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/filtergrmwithkeep/tests/nextflow.config b/modules/nf-core/gcta/filtergrmwithkeep/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/filtergrmwithkeep/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf new file mode 100644 index 000000000000..7accab6b7c94 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -0,0 +1,37 @@ +process GCTA_MAKEBKSPARSE { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + val(cutoff) + + output: + tuple val(meta), path("${meta.id}_sp.grm.id"), path("${meta.id}_sp.grm.sp"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --make-bK-sparse ${cutoff} \\ + --out ${meta.id}_sp \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_sp.grm.id + touch ${meta.id}_sp.grm.sp + """ +} diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml new file mode 100644 index 000000000000..c619ab38ede6 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -0,0 +1,84 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makebksparse" +description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses +keywords: + - gcta + - grm + - sparse + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - cutoff: + type: float + description: Sparse GRM cutoff passed to `--make-bK-sparse` + +output: + sparse_grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_sp.grm.id": + type: file + description: Sparse GRM sample identifier file + pattern: "${meta.id}_sp.grm.id" + ontologies: [] + - "${meta.id}_sp.grm.sp": + type: file + description: Sparse GRM matrix file + pattern: "${meta.id}_sp.grm.sp" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test new file mode 100644 index 000000000000..37a3a78367ff --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process GCTA_MAKEBKSPARSE" + script "../main.nf" + process "GCTA_MAKEBKSPARSE" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makebksparse" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - create sparse GRM") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.sparse_grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - create sparse GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap new file mode 100644 index 000000000000..42828835a289 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "homo_sapiens popgen - create sparse GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:15.829559344" + }, + "homo_sapiens popgen - create sparse GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ], + "sparse_grm_files": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:22.049066879" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makebksparse/tests/nextflow.config b/modules/nf-core/gcta/makebksparse/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/makegrmpart/environment.yml b/modules/nf-core/gcta/makegrmpart/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makegrmpart/main.nf b/modules/nf-core/gcta/makegrmpart/main.nf new file mode 100644 index 000000000000..f3398ee515db --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/main.nf @@ -0,0 +1,47 @@ +process GCTA_MAKEGRMPART { + tag "part ${meta.part_gcta_job} of ${meta.nparts_gcta} (${meta.id})" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam) + tuple val(meta2), path(snp_group_file) + + output: + tuple val(meta), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id"), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin"), path("${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def part_gcta_job = meta.part_gcta_job + def nparts_gcta = meta.nparts_gcta + def extract_cmd = snp_group_file ? "--extract ${snp_group_file}" : '' + def extra_args = task.ext.args ?: '' + def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen] + def genotype_extension = genotype_files[0].name.tokenize('.').last() + def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile' + + """ + set -euo pipefail + + gcta \\ + ${multi_file_flag} ${mfile} \\ + --make-grm-part ${nparts_gcta} ${part_gcta_job} \\ + ${extract_cmd} \\ + --maf 0.01 \\ + --thread-num ${task.cpus} \\ + --out ${meta.id} ${extra_args} + """ + + stub: + """ + touch ${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id + touch ${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin + touch ${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/makegrmpart/meta.yml b/modules/nf-core/gcta/makegrmpart/meta.yml new file mode 100644 index 000000000000..c29dfb89a8bc --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/meta.yml @@ -0,0 +1,100 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makegrmpart" +description: Compute one partition of a GCTA genetic relationship matrix +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: ["GPL-3.0-only"] +input: + - - meta: + type: map + description: | + Groovy Map containing GRM-partition job metadata + e.g. `[ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ]` + - mfile: + type: file + description: GCTA multi-input manifest consumed by `--mbfile` or `--mpfile` + pattern: "*.{mbfile,mpfile,txt}" + ontologies: + - edam: "http://edamontology.org/format_2330" + - bed_pgen: + type: file + description: Collection of PLINK primary genotype files referenced by the multi-input manifest + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: Collection of PLINK variant metadata files referenced by the multi-input manifest + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: Collection of PLINK sample metadata files referenced by the multi-input manifest + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing SNP-selection metadata + e.g. `[ id:'snp_group1', snp_group:1 ]` + - snp_group_file: + type: file + description: Optional SNP extraction file passed to `--extract`; provide `[]` when absent + pattern: "*.{txt,list}" + ontologies: + - edam: "http://edamontology.org/format_2330" +output: + grm_files: + - - meta: + type: map + description: | + Groovy Map containing GRM-partition job metadata + e.g. `[ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ]` + - "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id": + type: file + description: Partitioned GRM ID file + pattern: "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.id" + ontologies: [] + - "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin": + type: file + description: Partitioned GRM binary matrix file + pattern: "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.bin" + ontologies: [] + - "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin": + type: file + description: Partitioned GRM sample-count matrix file + pattern: "${meta.id}.part_${meta.nparts_gcta}_${meta.part_gcta_job}.grm.N.bin" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the versions were collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to generate the version of the tool +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/makegrmpart/tests/main.nf.test b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test new file mode 100644 index 000000000000..e8bf773bdf82 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process GCTA_MAKEGRMPART" + script "../main.nf" + process "GCTA_MAKEGRMPART" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makegrmpart" + + test("homo_sapiens popgen - plink2") { + config "./nextflow.config" + + when { + process { + """ + file('gcta_grm.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n' + + input[0] = [ + [ id:'gcta_grm', part_gcta_job:1, nparts_gcta:2 ], + file('gcta_grm.mpfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + ] + input[1] = [[ id:'snp_group0' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm' }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed', part_gcta_job:1, nparts_gcta:2 ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'snp_group0' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap new file mode 100644 index 000000000000..0ba4cfbf26bc --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "homo_sapiens popgen - plink2": { + "content": [ + [ + [ + { + "id": "gcta_grm", + "part_gcta_job": 1, + "nparts_gcta": 2 + }, + "gcta_grm.part_2_1.grm.id:md5,9c193413bbf336213da941abeee78718", + "gcta_grm.part_2_1.grm.bin:md5,b683a1daa96406174c02156527da1f19", + "gcta_grm.part_2_1.grm.N.bin:md5,0dcc3200354c243fca2de4c023352e66" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRMPART", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T13:29:12.139953008" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "gcta_grm_bed", + "part_gcta_job": 1, + "nparts_gcta": 2 + }, + "gcta_grm_bed.part_2_1.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_MAKEGRMPART", + "gcta", + "1.94.1" + ] + ], + "grm_files": [ + [ + { + "id": "gcta_grm_bed", + "part_gcta_job": 1, + "nparts_gcta": 2 + }, + "gcta_grm_bed.part_2_1.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.part_2_1.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEGRMPART", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T13:29:17.993546066" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makegrmpart/tests/nextflow.config b/modules/nf-core/gcta/makegrmpart/tests/nextflow.config new file mode 100644 index 000000000000..76b9ab148074 --- /dev/null +++ b/modules/nf-core/gcta/makegrmpart/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv('NF_MODULES_TESTDATA_BASE_PATH') ?: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} diff --git a/modules/nf-core/gcta/reml/environment.yml b/modules/nf-core/gcta/reml/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/reml/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/reml/main.nf b/modules/nf-core/gcta/reml/main.nf new file mode 100644 index 000000000000..581d1334913d --- /dev/null +++ b/modules/nf-core/gcta/reml/main.nf @@ -0,0 +1,47 @@ +process GCTA_REML { + tag "gcta_reml_${meta.id}_${meta2.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(phenotypes_file) + tuple val(meta2), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: reml_results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def mpheno_param = meta.mpheno ? "--mpheno ${meta.mpheno}" : '' + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml \\ + --grm ${meta2.id} \\ + --pheno ${phenotypes_file} \\ + ${mpheno_param} \\ + ${qcovar_param} \\ + ${covar_param} \\ + --out "${meta.id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + """ +} diff --git a/modules/nf-core/gcta/reml/meta.yml b/modules/nf-core/gcta/reml/meta.yml new file mode 100644 index 000000000000..1913d1470210 --- /dev/null +++ b/modules/nf-core/gcta/reml/meta.yml @@ -0,0 +1,107 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_reml" +description: Run univariate REML heritability estimation with a dense GRM +keywords: + - gcta + - reml + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + Optional phenotype selector may be supplied as `meta.mpheno` + - phenotypes_file: + type: file + description: Phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + reml_results: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + Preserves optional `meta.mpheno` when supplied + - "${meta.id}.hsq": + type: file + description: REML result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test new file mode 100644 index 000000000000..81b8a7ce0996 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -0,0 +1,194 @@ +nextflow_process { + + name "Test Process GCTA_REML" + script "../main.nf" + process "GCTA_REML" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/reml" + tag "gcta/makegrmpart" + tag "gawk" + + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_MULTI_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraits' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3, (\$3 * 1.7) + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - dense GRM with mpheno selection") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> + [[ id:'QuantitativeTraitMpheno2', mpheno:2 ], phenotype_file] + } + + input[0] = multi_pheno + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMpheno2" }, + { assert process.out.reml_results.get(0).get(0).mpheno == 2 }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - quantitative phenotype with dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = dense_grm + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap new file mode 100644 index 000000000000..195b2991921f --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -0,0 +1,96 @@ +{ + "homo_sapiens popgen - quantitative phenotype with dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ], + "reml_results": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:14:45.245259136" + }, + "homo_sapiens popgen - quantitative phenotype with dense GRM and covariates": { + "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,a1a3eb919cf7aec392435b4bf36ae788" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:01:39.933270203" + }, + "homo_sapiens popgen - dense GRM with mpheno selection": { + "content": [ + [ + [ + { + "id": "QuantitativeTraitMpheno2", + "mpheno": 2 + }, + "QuantitativeTraitMpheno2.hsq:md5,47a16182353f1c15a9b1408ee02bdcdc" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:01:34.187379358" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/reml/tests/nextflow.config b/modules/nf-core/gcta/reml/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/remlldms/environment.yml b/modules/nf-core/gcta/remlldms/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/remlldms/main.nf b/modules/nf-core/gcta/remlldms/main.nf new file mode 100644 index 000000000000..e8a5ea6e4c25 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/main.nf @@ -0,0 +1,46 @@ +process GCTA_REMLLDMS { + tag "gcta_reml_ldms_${meta.id}_${meta2.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(phenotypes_file) + tuple val(meta2), path(mgrm_file), path(grm_files) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + + output: + tuple val(meta), path("${meta.id}.hsq"), emit: reml_results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def mpheno_param = meta.mpheno ? "--mpheno ${meta.mpheno}" : '' + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml-no-constrain \\ + --mgrm ${mgrm_file} \\ + --pheno ${phenotypes_file} \\ + ${mpheno_param} \\ + ${qcovar_param} \\ + ${covar_param} \\ + --out "${meta.id}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + """ + touch "${meta.id}.hsq" + """ +} diff --git a/modules/nf-core/gcta/remlldms/meta.yml b/modules/nf-core/gcta/remlldms/meta.yml new file mode 100644 index 000000000000..7242477193a7 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/meta.yml @@ -0,0 +1,104 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_remlldms" +description: Run REML-LDMS heritability estimation with an MGRM manifest +keywords: + - gcta + - reml + - ldms + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + Optional phenotype selector may be supplied as `meta.mpheno` + - phenotypes_file: + type: file + description: Phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing MGRM metadata + e.g. `[ id:'plink_simulated_ldms' ]` + - mgrm_file: + type: file + description: MGRM manifest file + pattern: "*.mgrm" + ontologies: + - edam: "http://edamontology.org/format_2330" + - grm_files: + type: file + description: GRM sidecar files referenced by `mgrm_file` + pattern: "*" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + reml_results: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + Preserves optional `meta.mpheno` when supplied + - "${meta.id}.hsq": + type: file + description: REML-LDMS result file + pattern: "${meta.id}.hsq" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test b/modules/nf-core/gcta/remlldms/tests/main.nf.test new file mode 100644 index 000000000000..bd3964215e43 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test @@ -0,0 +1,267 @@ +nextflow_process { + + name "Test Process GCTA_REMLLDMS" + script "../main.nf" + process "GCTA_REMLLDMS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/remlldms" + tag "gcta/makegrmpart" + tag "gawk" + + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, ((NR % 11) - 5) + (((NR * NR) % 7) / 10.0) }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_MULTI_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraits' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ trait1 = ((NR % 11) - 5) + (((NR * NR) % 7) / 10.0); print \$1, \$2, trait1, trait1 + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, ((NR % 9) - 4) / 3.0 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, (NR % 2) + 1 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_LDMS1_VARIANTS") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'bfile_ldms1_variants' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + ] + input[1] = Channel.of('NR <= 800 { print \$2 }').collectFile(name:'ldms1_variants.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_LDMS2_VARIANTS") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'bfile_ldms2_variants' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + ] + input[1] = Channel.of('NR > 800 { print \$2 }').collectFile(name:'ldms2_variants.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS1") { + script "../../makegrmpart/main.nf" + process { + """ + file('bfile.mbfile').text = 'bfile\\n' + + input[0] = [ + [ id:'bfile_ldms1', part_gcta_job:1, nparts_gcta:1 ], + file('bfile.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + ] + input[1] = GAWK_LDMS1_VARIANTS.out.output + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_LDMS2") { + script "../../makegrmpart/main.nf" + process { + """ + file('bfile.mbfile').text = 'bfile\\n' + + input[0] = [ + [ id:'bfile_ldms2', part_gcta_job:1, nparts_gcta:1 ], + file('bfile.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/gsmr/bfile/bfile.fam', checkIfExists: true) + ] + ] + input[1] = GAWK_LDMS2_VARIANTS.out.output + """ + } + } + } + + test("homo_sapiens gsmr - ldms mgrm with mpheno selection") { + config "./nextflow.config" + when { + process { + """ + mgrm_file = Channel + .of('bfile_ldms1.part_1_1\\nbfile_ldms2.part_1_1') + .collectFile(name:'bfile_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> + [[ id:'QuantitativeTraitMpheno2', mpheno:2 ], phenotype_file] + } + + input[0] = multi_pheno + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'bfile_ldms' ], row[0], row[1..-1]] } + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMpheno2" }, + { assert process.out.reml_results.get(0).get(0).mpheno == 2 }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens gsmr - quantitative phenotype with ldms mgrm and covariates") { + config "./nextflow.config" + when { + process { + """ + mgrm_file = Channel + .of('bfile_ldms1.part_1_1\\nbfile_ldms2.part_1_1') + .collectFile(name:'bfile_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'bfile_ldms' ], row[0], row[1..-1]] } + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens gsmr - quantitative phenotype with ldms mgrm - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + mgrm_file = Channel + .of('bfile_ldms1.part_1_1\\nbfile_ldms2.part_1_1') + .collectFile(name:'bfile_ldms.mgrm', newLine: true) + + ldms_grm_files = GCTA_MAKEGRMPART_LDMS1.out.grm_files + .mix(GCTA_MAKEGRMPART_LDMS2.out.grm_files) + .map { meta, grm_id, grm_bin, grm_n_bin -> [grm_id, grm_bin, grm_n_bin] } + .collect() + .map { rows -> rows.flatten() } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = mgrm_file + .combine(ldms_grm_files) + .map { row -> [[ id:'bfile_ldms' ], row[0], row[1..-1]] } + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap new file mode 100644 index 000000000000..1fd85202b4af --- /dev/null +++ b/modules/nf-core/gcta/remlldms/tests/main.nf.test.snap @@ -0,0 +1,96 @@ +{ + "homo_sapiens gsmr - quantitative phenotype with ldms mgrm - stub": { + "content": [ + { + "0": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_REMLLDMS", + "gcta", + "1.94.1" + ] + ], + "reml_results": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_REMLLDMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:57:32.371358163" + }, + "homo_sapiens gsmr - ldms mgrm with mpheno selection": { + "content": [ + [ + [ + { + "id": "QuantitativeTraitMpheno2", + "mpheno": 2 + }, + "QuantitativeTraitMpheno2.hsq:md5,2ceb8590010a6e274f8339b3c77e18ef" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REMLLDMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T19:11:03.430185704" + }, + "homo_sapiens gsmr - quantitative phenotype with ldms mgrm and covariates": { + "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,3a0b00fba467f22b6090f0233188856a" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REMLLDMS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-16T19:11:15.129305749" + } +} diff --git a/modules/nf-core/gcta/remlldms/tests/nextflow.config b/modules/nf-core/gcta/remlldms/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/remlldms/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/removerelatedsubjects/environment.yml b/modules/nf-core/gcta/removerelatedsubjects/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/removerelatedsubjects/main.nf b/modules/nf-core/gcta/removerelatedsubjects/main.nf new file mode 100644 index 000000000000..3f90d2f388eb --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/main.nf @@ -0,0 +1,39 @@ +process GCTA_REMOVERELATEDSUBJECTS { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(grm_id), path(grm_bin), path(grm_n_bin) + + output: + tuple val(meta), path("${meta.id}_unrel05.grm.id"), path("${meta.id}_unrel05.grm.bin"), path("${meta.id}_unrel05.grm.N.bin"), emit: grm_files + tuple val(meta), path("${meta.id}_unrel05.grm.id"), emit: keep_file + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --grm-cutoff 0.05 \\ + --make-grm \\ + --out ${meta.id}_unrel05 \\ + --thread-num ${task.cpus} \\ + ${args} + """ + + stub: + """ + touch ${meta.id}_unrel05.grm.id + touch ${meta.id}_unrel05.grm.bin + touch ${meta.id}_unrel05.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/removerelatedsubjects/meta.yml b/modules/nf-core/gcta/removerelatedsubjects/meta.yml new file mode 100644 index 000000000000..e8085c2564ee --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/meta.yml @@ -0,0 +1,96 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_removerelatedsubjects" +description: Remove related individuals from a dense GRM using `gcta --grm-cutoff` +keywords: + - gcta + - grm + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + +output: + grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_unrel05.grm.id": + type: file + description: Relatedness-filtered GRM sample identifier file + pattern: "${meta.id}_unrel05.grm.id" + ontologies: [] + - "${meta.id}_unrel05.grm.bin": + type: file + description: Relatedness-filtered GRM binary matrix file + pattern: "${meta.id}_unrel05.grm.bin" + ontologies: [] + - "${meta.id}_unrel05.grm.N.bin": + type: file + description: Relatedness-filtered GRM sample-count matrix file + pattern: "${meta.id}_unrel05.grm.N.bin" + ontologies: [] + keep_file: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - "${meta.id}_unrel05.grm.id": + type: file + description: Keep file of unrelated individuals emitted by GCTA + pattern: "${meta.id}_unrel05.grm.id" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test new file mode 100644 index 000000000000..2716ea7a9970 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process GCTA_REMOVERELATEDSUBJECTS" + script "../main.nf" + process "GCTA_REMOVERELATEDSUBJECTS" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/removerelatedsubjects" + tag "gcta/makegrmpart" + + setup { + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - remove related individuals from dense GRM") { + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.keep_file.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == "plink_simulated_dense.part_1_1" }, + { + assert snapshot( + process.out.grm_files, + process.out.keep_file, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - remove related individuals from dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = dense_grm + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap new file mode 100644 index 000000000000..0751d240524d --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/main.nf.test.snap @@ -0,0 +1,99 @@ +{ + "homo_sapiens popgen - remove related individuals from dense GRM": { + "content": [ + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b", + "plink_simulated_dense.part_1_1_unrel05.grm.bin:md5,b1f124463eecbae86840a6651eec372d", + "plink_simulated_dense.part_1_1_unrel05.grm.N.bin:md5,06b73ea8bae8f1e5f5d4de33dbd2c75e" + ] + ], + [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,ca8c0bded6951fdd3bf0dddc97b6df6b" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REMOVERELATEDSUBJECTS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:41.961471309" + }, + "homo_sapiens popgen - remove related individuals from dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "GCTA_REMOVERELATEDSUBJECTS", + "gcta", + "1.94.1" + ] + ], + "grm_files": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "plink_simulated_dense.part_1_1_unrel05.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "keep_file": [ + [ + { + "id": "plink_simulated_dense.part_1_1" + }, + "plink_simulated_dense.part_1_1_unrel05.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_REMOVERELATEDSUBJECTS", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T15:36:48.571305038" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/removerelatedsubjects/tests/nextflow.config b/modules/nf-core/gcta/removerelatedsubjects/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/removerelatedsubjects/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +}