From 4200d90bcb889513261f4ce5117ed03cba26c7ad Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Fri, 20 Mar 2026 23:12:13 +0800 Subject: [PATCH 1/5] feat: add gcta/reml module --- modules/nf-core/gcta/reml/environment.yml | 7 + modules/nf-core/gcta/reml/main.nf | 51 +++++ modules/nf-core/gcta/reml/meta.yml | 110 ++++++++++ modules/nf-core/gcta/reml/tests/main.nf.test | 196 ++++++++++++++++++ .../nf-core/gcta/reml/tests/main.nf.test.snap | 95 +++++++++ .../nf-core/gcta/reml/tests/nextflow.config | 3 + 6 files changed, 462 insertions(+) create mode 100644 modules/nf-core/gcta/reml/environment.yml create mode 100644 modules/nf-core/gcta/reml/main.nf create mode 100644 modules/nf-core/gcta/reml/meta.yml create mode 100644 modules/nf-core/gcta/reml/tests/main.nf.test create mode 100644 modules/nf-core/gcta/reml/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/reml/tests/nextflow.config diff --git a/modules/nf-core/gcta/reml/environment.yml b/modules/nf-core/gcta/reml/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/reml/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/reml/main.nf b/modules/nf-core/gcta/reml/main.nf new file mode 100644 index 000000000000..827faf927b68 --- /dev/null +++ b/modules/nf-core/gcta/reml/main.nf @@ -0,0 +1,51 @@ +process GCTA_REML { + tag "gcta_reml_${meta.id}_${meta2.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + + input: + tuple val(meta), path(phenotypes_file) + tuple val(meta2), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta3), path(quant_covariates_file) + tuple val(meta4), path(cat_covariates_file) + val(mpheno) + + output: + tuple val(meta), path("*.hsq"), emit: reml_results + tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def mpheno_value = (mpheno == null || (mpheno instanceof Collection && mpheno.isEmpty())) ? 1 : mpheno + def mpheno_param = "--mpheno ${mpheno_value}" + def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + --reml \\ + --grm ${meta2.id} \\ + --pheno ${phenotypes_file} \\ + ${mpheno_param} \\ + ${qcovar_param} \\ + ${covar_param} \\ + --out "${prefix}" \\ + --thread-num ${task.cpus} ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.hsq" + """ +} diff --git a/modules/nf-core/gcta/reml/meta.yml b/modules/nf-core/gcta/reml/meta.yml new file mode 100644 index 000000000000..8f6b97f5e7a2 --- /dev/null +++ b/modules/nf-core/gcta/reml/meta.yml @@ -0,0 +1,110 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_reml" +description: Run univariate REML heritability estimation with a dense GRM +keywords: + - gcta + - reml + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" +input: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - phenotypes_file: + type: file + description: Phenotype file passed to `--pheno` + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta2: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + - grm_id: + type: file + description: Dense GRM sample identifier file + pattern: "*.grm.id" + ontologies: [] + - grm_bin: + type: file + description: Dense GRM binary matrix file + pattern: "*.grm.bin" + ontologies: [] + - grm_n_bin: + type: file + description: Dense GRM sample-count matrix file + pattern: "*.grm.N.bin" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta4: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - mpheno: + type: integer + description: | + Phenotype column selector passed to `--mpheno`. + Pass `1` explicitly for the default first phenotype column. +output: + reml_results: + - - meta: + type: map + description: | + Groovy map containing phenotype metadata + e.g. `[ id:'QuantitativeTrait' ]` + - "*.hsq": + type: file + description: REML result file + pattern: "*.{hsq}" + ontologies: + - edam: "http://edamontology.org/format_2330" + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@andongni" +maintainers: + - "@andongni" diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test new file mode 100644 index 000000000000..ff9f5e12d16f --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -0,0 +1,196 @@ +nextflow_process { + + name "Test Process GCTA_REML" + script "../main.nf" + process "GCTA_REML" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/reml" + tag "gcta/makegrmpart" + tag "gawk" + + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_MULTI_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraits' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3, (\$3 * 1.7) + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } + + test("homo_sapiens popgen - dense GRM with mpheno selection") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> + [[ id:'QuantitativeTraitMpheno2' ], phenotype_file] + } + + input[0] = multi_pheno + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + input[4] = 2 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMpheno2" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = dense_grm + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output + input[4] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - quantitative phenotype with dense GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + + input[0] = GAWK_QUANTITATIVE_PHENO.out.output + input[1] = dense_grm + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] + input[4] = 1 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap new file mode 100644 index 000000000000..cd4fd313db70 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "homo_sapiens popgen - quantitative phenotype with dense GRM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ], + "reml_results": [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-13T14:14:45.245259136" + }, + "homo_sapiens popgen - quantitative phenotype with dense GRM and covariates": { + "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,a1a3eb919cf7aec392435b4bf36ae788" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-18T19:01:39.933270203" + }, + "homo_sapiens popgen - dense GRM with mpheno selection": { + "content": [ + [ + [ + { + "id": "QuantitativeTraitMpheno2" + }, + "QuantitativeTraitMpheno2.hsq:md5,47a16182353f1c15a9b1408ee02bdcdc" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-20T21:28:38.137634953" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/reml/tests/nextflow.config b/modules/nf-core/gcta/reml/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} From 1352c09d917f0f50078e9608b7c6cfa257657e75 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 15:53:18 +0800 Subject: [PATCH 2/5] test(gcta/reml): cover mpheno and no-covariate paths --- modules/nf-core/gcta/reml/tests/main.nf.test | 364 +++++++++++++----- .../nf-core/gcta/reml/tests/main.nf.test.snap | 79 +++- 2 files changed, 318 insertions(+), 125 deletions(-) diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test index ff9f5e12d16f..0fbdc3bb24d5 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -8,111 +8,162 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/reml" - tag "gcta/makegrmpart" - tag "gawk" - setup { - run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { - script "../../../gawk/main.nf" + test("homo_sapiens popgen - dense GRM with mpheno selection") { + config "./nextflow.config" + when { process { - """ - input[0] = [ - [ id:'QuantitativeTrait' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) - ] - input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') - input[2] = false - """ - } - } + ''' + def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } + def write_float_binary = { filename, values -> + def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + values.each { value -> buffer.putFloat((value as Number).floatValue()) } + file(filename).bytes = buffer.array() + } - run("GAWK", alias: "GAWK_MULTI_PHENO") { - script "../../../gawk/main.nf" - process { - """ - input[0] = [ - [ id:'QuantitativeTraits' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) - ] - input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3, (\$3 * 1.7) + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') - input[2] = false - """ + file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' + + def dense_relationships = [] + for (int i = 0; i < sample_ids.size(); i++) { + for (int j = 0; j <= i; j++) { + dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() + } + } + write_float_binary('dense.grm.bin', dense_relationships) + write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) + + file('multi_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> + def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) + def trait2 = trait1 + ((idx % 5) / 10.0) + "${sid} ${sid} ${trait1} ${trait2}" + }.join('\\n') + '\\n' + + file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" + }.join('\\n') + '\\n' + + file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${(idx % 3) + 1}" + }.join('\\n') + '\\n' + + input[0] = [[ id:'QuantitativeTraitMpheno2' ], file('multi_phenotypes.phe')] + input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] + input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] + input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + input[4] = 2 + ''' } } - run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { - script "../../../gawk/main.nf" - process { - """ - input[0] = [ - [ id:'covariates_quant' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) - ] - input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') - input[2] = false - """ - } + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMpheno2" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) } + } - run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { - script "../../../gawk/main.nf" + test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { + config "./nextflow.config" + when { process { - """ - input[0] = [ - [ id:'covariates_cat' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) - ] - input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') - input[2] = false - """ + ''' + def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } + def write_float_binary = { filename, values -> + def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + values.each { value -> buffer.putFloat((value as Number).floatValue()) } + file(filename).bytes = buffer.array() + } + + file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' + + def dense_relationships = [] + for (int i = 0; i < sample_ids.size(); i++) { + for (int j = 0; j <= i; j++) { + dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() + } + } + write_float_binary('dense.grm.bin', dense_relationships) + write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) + + file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> + def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) + "${sid} ${sid} ${trait1}" + }.join('\\n') + '\\n' + + file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" + }.join('\\n') + '\\n' + + file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${(idx % 3) + 1}" + }.join('\\n') + '\\n' + + input[0] = [[ id:'QuantitativeTrait' ], file('quantitative_phenotypes.phe')] + input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] + input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] + input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + input[4] = 1 + ''' } } - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { - script "../../makegrmpart/main.nf" - process { - """ - file('plink_simulated.mbfile').text = 'plink_simulated\\n' - - input[0] = [ - [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], - file('plink_simulated.mbfile'), - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) - ], - [ - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) - ] - ] - input[1] = [[ id:'all_variants' ], []] - """ - } + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) } } - test("homo_sapiens popgen - dense GRM with mpheno selection") { + test("homo_sapiens popgen - quantitative phenotype with dense GRM and no covariates") { config "./nextflow.config" when { process { - """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + ''' + def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } + def write_float_binary = { filename, values -> + def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + values.each { value -> buffer.putFloat((value as Number).floatValue()) } + file(filename).bytes = buffer.array() } - multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> - [[ id:'QuantitativeTraitMpheno2' ], phenotype_file] + file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' + + def dense_relationships = [] + for (int i = 0; i < sample_ids.size(); i++) { + for (int j = 0; j <= i; j++) { + dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() + } } + write_float_binary('dense.grm.bin', dense_relationships) + write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) - input[0] = multi_pheno - input[1] = dense_grm - input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output - input[3] = GAWK_CATEGORICAL_COVARIATES.out.output - input[4] = 2 - """ + file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> + def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) + "${sid} ${sid} ${trait1}" + }.join('\\n') + '\\n' + + input[0] = [[ id:'QuantitativeTrait' ], file('quantitative_phenotypes.phe')] + input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] + input[4] = 1 + ''' } } @@ -120,7 +171,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.reml_results.size() == 1 }, - { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMpheno2" }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, { assert snapshot( process.out.reml_results, @@ -131,22 +182,49 @@ nextflow_process { } } - test("homo_sapiens popgen - quantitative phenotype with dense GRM and covariates") { + test("homo_sapiens popgen - dense GRM mpheno defaults to first phenotype when empty") { config "./nextflow.config" when { process { - """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + ''' + def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } + def write_float_binary = { filename, values -> + def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + values.each { value -> buffer.putFloat((value as Number).floatValue()) } + file(filename).bytes = buffer.array() } - input[0] = GAWK_QUANTITATIVE_PHENO.out.output - input[1] = dense_grm - input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output - input[3] = GAWK_CATEGORICAL_COVARIATES.out.output - input[4] = 1 - """ + file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' + + def dense_relationships = [] + for (int i = 0; i < sample_ids.size(); i++) { + for (int j = 0; j <= i; j++) { + dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() + } + } + write_float_binary('dense.grm.bin', dense_relationships) + write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) + + file('multi_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> + def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) + def trait2 = trait1 + ((idx % 5) / 10.0) + "${sid} ${sid} ${trait1} ${trait2}" + }.join('\\n') + '\\n' + + file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" + }.join('\\n') + '\\n' + + file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${(idx % 3) + 1}" + }.join('\\n') + '\\n' + + input[0] = [[ id:'QuantitativeTraitMphenoDefault' ], file('multi_phenotypes.phe')] + input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] + input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] + input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + input[4] = [] + ''' } } @@ -154,7 +232,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.reml_results.size() == 1 }, - { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTraitMphenoDefault" }, { assert snapshot( process.out.reml_results, @@ -165,31 +243,109 @@ nextflow_process { } } + test("homo_sapiens popgen - fails when mpheno index is out of range") { + config "./nextflow.config" + when { + process { + ''' + def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } + def write_float_binary = { filename, values -> + def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + values.each { value -> buffer.putFloat((value as Number).floatValue()) } + file(filename).bytes = buffer.array() + } + + file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' + + def dense_relationships = [] + for (int i = 0; i < sample_ids.size(); i++) { + for (int j = 0; j <= i; j++) { + dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() + } + } + write_float_binary('dense.grm.bin', dense_relationships) + write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) + + file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> + def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) + "${sid} ${sid} ${trait1}" + }.join('\\n') + '\\n' + + file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" + }.join('\\n') + '\\n' + + file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> + "${sid} ${sid} ${(idx % 3) + 1}" + }.join('\\n') + '\\n' + + input[0] = [[ id:'QuantitativeTraitInvalidMpheno' ], file('quantitative_phenotypes.phe')] + input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] + input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] + input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + input[4] = 3 + ''' + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.exitStatus != 0 } + ) + } + } + test("homo_sapiens popgen - quantitative phenotype with dense GRM - stub") { options "-stub" config "./nextflow.config" when { process { - """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + ''' + def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } + def write_float_binary = { filename, values -> + def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) + values.each { value -> buffer.putFloat((value as Number).floatValue()) } + file(filename).bytes = buffer.array() + } + + file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' + + def dense_relationships = [] + for (int i = 0; i < sample_ids.size(); i++) { + for (int j = 0; j <= i; j++) { + dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() + } } + write_float_binary('dense.grm.bin', dense_relationships) + write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) - input[0] = GAWK_QUANTITATIVE_PHENO.out.output - input[1] = dense_grm + file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> + def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) + "${sid} ${sid} ${trait1}" + }.join('\\n') + '\\n' + + input[0] = [[ id:'QuantitativeTrait' ], file('quantitative_phenotypes.phe')] + input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] input[2] = [[ id:'covariates_quant' ], []] input[3] = [[ id:'covariates_cat' ], []] input[4] = 1 - """ + ''' } } then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "QuantitativeTrait" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } ) } } diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap index cd4fd313db70..90d9bc543baa 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -1,30 +1,67 @@ { "homo_sapiens popgen - quantitative phenotype with dense GRM - stub": { "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], { - "0": [ - [ - { - "id": "QuantitativeTrait" - }, - "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ + "versions_gcta": [ [ "GCTA_REML", "gcta", "1.94.1" ] - ], - "reml_results": [ + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:39:12.308775079" + }, + "homo_sapiens popgen - dense GRM mpheno defaults to first phenotype when empty": { + "content": [ + [ + [ + { + "id": "QuantitativeTraitMphenoDefault" + }, + "QuantitativeTraitMphenoDefault.hsq:md5,3a8396598d6245a65dd169e1517c8c4c" + ] + ], + { + "versions_gcta": [ [ - { - "id": "QuantitativeTrait" - }, - "QuantitativeTrait.hsq:md5,d41d8cd98f00b204e9800998ecf8427e" + "GCTA_REML", + "gcta", + "1.94.1" ] - ], + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:38:50.94958986" + }, + "homo_sapiens popgen - quantitative phenotype with dense GRM and no covariates": { + "content": [ + [ + [ + { + "id": "QuantitativeTrait" + }, + "QuantitativeTrait.hsq:md5,842502be482083726be1385c9f14fc87" + ] + ], + { "versions_gcta": [ [ "GCTA_REML", @@ -38,7 +75,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-13T14:14:45.245259136" + "timestamp": "2026-03-21T00:38:35.808675394" }, "homo_sapiens popgen - quantitative phenotype with dense GRM and covariates": { "content": [ @@ -47,7 +84,7 @@ { "id": "QuantitativeTrait" }, - "QuantitativeTrait.hsq:md5,a1a3eb919cf7aec392435b4bf36ae788" + "QuantitativeTrait.hsq:md5,3a8396598d6245a65dd169e1517c8c4c" ] ], { @@ -64,7 +101,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-18T19:01:39.933270203" + "timestamp": "2026-03-21T00:38:24.689275177" }, "homo_sapiens popgen - dense GRM with mpheno selection": { "content": [ @@ -73,7 +110,7 @@ { "id": "QuantitativeTraitMpheno2" }, - "QuantitativeTraitMpheno2.hsq:md5,47a16182353f1c15a9b1408ee02bdcdc" + "QuantitativeTraitMpheno2.hsq:md5,2e6e9fa7a62e7439a2b9bc0b7e2f2e4f" ] ], { @@ -90,6 +127,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T21:28:38.137634953" + "timestamp": "2026-03-21T00:38:11.031642957" } } \ No newline at end of file From b6396d7a8e0e1fa4aa273829369ccd39e8924ee0 Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 18:47:05 +0800 Subject: [PATCH 3/5] test(gcta/reml): generate GRM inputs in setup --- .../gcta/reml/tests/helpers/makegrm/main.nf | 36 ++ modules/nf-core/gcta/reml/tests/main.nf.test | 307 ++++++------------ .../nf-core/gcta/reml/tests/main.nf.test.snap | 16 +- 3 files changed, 149 insertions(+), 210 deletions(-) create mode 100644 modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf diff --git a/modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf b/modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf new file mode 100644 index 000000000000..7127d710af83 --- /dev/null +++ b/modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf @@ -0,0 +1,36 @@ +process GCTA_MAKEGRM_HELPER { + tag "${meta.id}" + label "process_medium" + conda "${projectDir}/modules/nf-core/gcta/reml/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : + 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + tuple val(meta2), path(extract_file) + + output: + tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: grm_files + + script: + def extract_param = extract_file ? "--extract ${extract_file}" : "" + def bfile_prefix = bed.baseName + """ + set -euo pipefail + + gcta \\ + --bfile "${bfile_prefix}" \\ + ${extract_param} \\ + --make-grm \\ + --out "${meta.id}" \\ + --thread-num ${task.cpus} + """ + + stub: + """ + touch "${meta.id}.grm.id" + touch "${meta.id}.grm.bin" + touch "${meta.id}.grm.N.bin" + """ +} diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test index 0fbdc3bb24d5..dc749b6742f8 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -8,50 +8,93 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/reml" + tag "tests/helpers/makegrm" + tag "gawk" - test("homo_sapiens popgen - dense GRM with mpheno selection") { - config "./nextflow.config" - when { + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENOTYPE") { + script "../../../gawk/main.nf" process { - ''' - def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } - def write_float_binary = { filename, values -> - def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - values.each { value -> buffer.putFloat((value as Number).floatValue()) } - file(filename).bytes = buffer.array() - } - - file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotypes.awk') + input[2] = false + """ + } + } - def dense_relationships = [] - for (int i = 0; i < sample_ids.size(); i++) { - for (int j = 0; j <= i; j++) { - dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() - } - } - write_float_binary('dense.grm.bin', dense_relationships) - write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) + run("GAWK", alias: "GAWK_MULTI_PHENOTYPES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraitMpheno' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('{ print \$1, \$2, \$3, (\$3 * 0.5) + ((NR % 7) / 10.0) }').collectFile(name:'multi_phenotypes.awk') + input[2] = false + """ + } + } - file('multi_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> - def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) - def trait2 = trait1 + ((idx % 5) / 10.0) - "${sid} ${sid} ${trait1} ${trait2}" - }.join('\\n') + '\\n' + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } - file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" - }.join('\\n') + '\\n' + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } - file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${(idx % 3) + 1}" - }.join('\\n') + '\\n' + run("GCTA_MAKEGRM_HELPER", alias: "GCTA_MAKEGRM_DENSE") { + script "../tests/helpers/makegrm/main.nf" + process { + """ + input[0] = [ + [ id:'plink_simulated_dense.part_1_1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + input[1] = [[ id:'all_variants' ], []] + """ + } + } + } - input[0] = [[ id:'QuantitativeTraitMpheno2' ], file('multi_phenotypes.phe')] - input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] - input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] - input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + test("homo_sapiens popgen - dense GRM with mpheno selection") { + config "./nextflow.config" + when { + process { + """ + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMpheno2' ], pheno] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = 2 - ''' + """ } } @@ -74,44 +117,13 @@ nextflow_process { config "./nextflow.config" when { process { - ''' - def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } - def write_float_binary = { filename, values -> - def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - values.each { value -> buffer.putFloat((value as Number).floatValue()) } - file(filename).bytes = buffer.array() - } - - file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' - - def dense_relationships = [] - for (int i = 0; i < sample_ids.size(); i++) { - for (int j = 0; j <= i; j++) { - dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() - } - } - write_float_binary('dense.grm.bin', dense_relationships) - write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) - - file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> - def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) - "${sid} ${sid} ${trait1}" - }.join('\\n') + '\\n' - - file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" - }.join('\\n') + '\\n' - - file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${(idx % 3) + 1}" - }.join('\\n') + '\\n' - - input[0] = [[ id:'QuantitativeTrait' ], file('quantitative_phenotypes.phe')] - input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] - input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] - input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + """ + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = 1 - ''' + """ } } @@ -134,36 +146,13 @@ nextflow_process { config "./nextflow.config" when { process { - ''' - def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } - def write_float_binary = { filename, values -> - def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - values.each { value -> buffer.putFloat((value as Number).floatValue()) } - file(filename).bytes = buffer.array() - } - - file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' - - def dense_relationships = [] - for (int i = 0; i < sample_ids.size(); i++) { - for (int j = 0; j <= i; j++) { - dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() - } - } - write_float_binary('dense.grm.bin', dense_relationships) - write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) - - file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> - def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) - "${sid} ${sid} ${trait1}" - }.join('\\n') + '\\n' - - input[0] = [[ id:'QuantitativeTrait' ], file('quantitative_phenotypes.phe')] - input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] + """ + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = [[ id:'covariates_quant' ], []] input[3] = [[ id:'covariates_cat' ], []] input[4] = 1 - ''' + """ } } @@ -186,45 +175,13 @@ nextflow_process { config "./nextflow.config" when { process { - ''' - def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } - def write_float_binary = { filename, values -> - def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - values.each { value -> buffer.putFloat((value as Number).floatValue()) } - file(filename).bytes = buffer.array() - } - - file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' - - def dense_relationships = [] - for (int i = 0; i < sample_ids.size(); i++) { - for (int j = 0; j <= i; j++) { - dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() - } - } - write_float_binary('dense.grm.bin', dense_relationships) - write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) - - file('multi_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> - def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) - def trait2 = trait1 + ((idx % 5) / 10.0) - "${sid} ${sid} ${trait1} ${trait2}" - }.join('\\n') + '\\n' - - file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" - }.join('\\n') + '\\n' - - file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${(idx % 3) + 1}" - }.join('\\n') + '\\n' - - input[0] = [[ id:'QuantitativeTraitMphenoDefault' ], file('multi_phenotypes.phe')] - input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] - input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] - input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + """ + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMphenoDefault' ], pheno] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = [] - ''' + """ } } @@ -247,44 +204,13 @@ nextflow_process { config "./nextflow.config" when { process { - ''' - def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } - def write_float_binary = { filename, values -> - def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - values.each { value -> buffer.putFloat((value as Number).floatValue()) } - file(filename).bytes = buffer.array() - } - - file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' - - def dense_relationships = [] - for (int i = 0; i < sample_ids.size(); i++) { - for (int j = 0; j <= i; j++) { - dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() - } - } - write_float_binary('dense.grm.bin', dense_relationships) - write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) - - file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> - def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) - "${sid} ${sid} ${trait1}" - }.join('\\n') + '\\n' - - file('quantitative_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${((idx % 9) - 4) / 3.0} ${((idx % 7) - 3) / 2.5}" - }.join('\\n') + '\\n' - - file('categorical_covariates.txt').text = sample_ids.withIndex().collect { sid, idx -> - "${sid} ${sid} ${(idx % 3) + 1}" - }.join('\\n') + '\\n' - - input[0] = [[ id:'QuantitativeTraitInvalidMpheno' ], file('quantitative_phenotypes.phe')] - input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] - input[2] = [[ id:'covariates_quant' ], file('quantitative_covariates.txt')] - input[3] = [[ id:'covariates_cat' ], file('categorical_covariates.txt')] + """ + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitInvalidMpheno' ], pheno] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = 3 - ''' + """ } } @@ -302,36 +228,13 @@ nextflow_process { when { process { - ''' - def sample_ids = (1..150).collect { idx -> "SAMPLE_${idx}" } - def write_float_binary = { filename, values -> - def buffer = java.nio.ByteBuffer.allocate(values.size() * 4).order(java.nio.ByteOrder.LITTLE_ENDIAN) - values.each { value -> buffer.putFloat((value as Number).floatValue()) } - file(filename).bytes = buffer.array() - } - - file('dense.grm.id').text = sample_ids.collect { sid -> "${sid} ${sid}" }.join('\\n') + '\\n' - - def dense_relationships = [] - for (int i = 0; i < sample_ids.size(); i++) { - for (int j = 0; j <= i; j++) { - dense_relationships << Math.exp(-Math.abs(i - j) / 20.0d).floatValue() - } - } - write_float_binary('dense.grm.bin', dense_relationships) - write_float_binary('dense.grm.N.bin', dense_relationships.collect { 2500.0f }) - - file('quantitative_phenotypes.phe').text = sample_ids.withIndex().collect { sid, idx -> - def trait1 = ((idx % 13) - 6) + ((idx % 4) / 10.0) - "${sid} ${sid} ${trait1}" - }.join('\\n') + '\\n' - - input[0] = [[ id:'QuantitativeTrait' ], file('quantitative_phenotypes.phe')] - input[1] = [[ id:'dense' ], file('dense.grm.id'), file('dense.grm.bin'), file('dense.grm.N.bin')] + """ + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = [[ id:'covariates_quant' ], []] input[3] = [[ id:'covariates_cat' ], []] input[4] = 1 - ''' + """ } } diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap index 90d9bc543baa..cacd5b4c1355 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -32,7 +32,7 @@ { "id": "QuantitativeTraitMphenoDefault" }, - "QuantitativeTraitMphenoDefault.hsq:md5,3a8396598d6245a65dd169e1517c8c4c" + "QuantitativeTraitMphenoDefault.hsq:md5,17d5e79e461b582b0aaba2a40666c8f7" ] ], { @@ -49,7 +49,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:38:50.94958986" + "timestamp": "2026-03-21T18:45:48.891436292" }, "homo_sapiens popgen - quantitative phenotype with dense GRM and no covariates": { "content": [ @@ -58,7 +58,7 @@ { "id": "QuantitativeTrait" }, - "QuantitativeTrait.hsq:md5,842502be482083726be1385c9f14fc87" + "QuantitativeTrait.hsq:md5,51c1328c8feb6d53f3984cd58324fed7" ] ], { @@ -75,7 +75,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:38:35.808675394" + "timestamp": "2026-03-21T18:45:38.965175332" }, "homo_sapiens popgen - quantitative phenotype with dense GRM and covariates": { "content": [ @@ -84,7 +84,7 @@ { "id": "QuantitativeTrait" }, - "QuantitativeTrait.hsq:md5,3a8396598d6245a65dd169e1517c8c4c" + "QuantitativeTrait.hsq:md5,17d5e79e461b582b0aaba2a40666c8f7" ] ], { @@ -101,7 +101,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:38:24.689275177" + "timestamp": "2026-03-21T18:45:28.626250983" }, "homo_sapiens popgen - dense GRM with mpheno selection": { "content": [ @@ -110,7 +110,7 @@ { "id": "QuantitativeTraitMpheno2" }, - "QuantitativeTraitMpheno2.hsq:md5,2e6e9fa7a62e7439a2b9bc0b7e2f2e4f" + "QuantitativeTraitMpheno2.hsq:md5,0a29048e72305f462889481b2dfb94db" ] ], { @@ -127,6 +127,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.4" }, - "timestamp": "2026-03-21T00:38:11.031642957" + "timestamp": "2026-03-21T18:45:19.433138352" } } \ No newline at end of file From 072f5d4147cf2a3cc1bd04b3abf84a0bb8a1d55c Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 21 Mar 2026 20:48:10 +0800 Subject: [PATCH 4/5] tests: switch reml setup to makegrmpart --- .../gcta/reml/tests/helpers/makegrm/main.nf | 36 ---------- modules/nf-core/gcta/reml/tests/main.nf.test | 65 +++++++++++++++---- 2 files changed, 52 insertions(+), 49 deletions(-) delete mode 100644 modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf diff --git a/modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf b/modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf deleted file mode 100644 index 7127d710af83..000000000000 --- a/modules/nf-core/gcta/reml/tests/helpers/makegrm/main.nf +++ /dev/null @@ -1,36 +0,0 @@ -process GCTA_MAKEGRM_HELPER { - tag "${meta.id}" - label "process_medium" - conda "${projectDir}/modules/nf-core/gcta/reml/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" - - input: - tuple val(meta), path(bed), path(bim), path(fam) - tuple val(meta2), path(extract_file) - - output: - tuple val(meta), path("${meta.id}.grm.id"), path("${meta.id}.grm.bin"), path("${meta.id}.grm.N.bin"), emit: grm_files - - script: - def extract_param = extract_file ? "--extract ${extract_file}" : "" - def bfile_prefix = bed.baseName - """ - set -euo pipefail - - gcta \\ - --bfile "${bfile_prefix}" \\ - ${extract_param} \\ - --make-grm \\ - --out "${meta.id}" \\ - --thread-num ${task.cpus} - """ - - stub: - """ - touch "${meta.id}.grm.id" - touch "${meta.id}.grm.bin" - touch "${meta.id}.grm.N.bin" - """ -} diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test index dc749b6742f8..fa8e031c366d 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/reml" - tag "tests/helpers/makegrm" + tag "gcta/makegrmpart" tag "gawk" setup { @@ -68,15 +68,24 @@ nextflow_process { } } - run("GCTA_MAKEGRM_HELPER", alias: "GCTA_MAKEGRM_DENSE") { - script "../tests/helpers/makegrm/main.nf" + run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { + script "../../makegrmpart/main.nf" process { """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + input[0] = [ - [ id:'plink_simulated_dense.part_1_1' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] ] input[1] = [[ id:'all_variants' ], []] """ @@ -89,8 +98,13 @@ nextflow_process { when { process { """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMpheno2' ], pheno] } - input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[1] = dense_grm input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = 2 @@ -118,8 +132,13 @@ nextflow_process { when { process { """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output - input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[1] = dense_grm input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = 1 @@ -147,8 +166,13 @@ nextflow_process { when { process { """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output - input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[1] = dense_grm input[2] = [[ id:'covariates_quant' ], []] input[3] = [[ id:'covariates_cat' ], []] input[4] = 1 @@ -176,8 +200,13 @@ nextflow_process { when { process { """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMphenoDefault' ], pheno] } - input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[1] = dense_grm input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = [] @@ -205,8 +234,13 @@ nextflow_process { when { process { """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitInvalidMpheno' ], pheno] } - input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[1] = dense_grm input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output input[4] = 3 @@ -229,8 +263,13 @@ nextflow_process { when { process { """ + dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> + def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job + [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + } + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output - input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[1] = dense_grm input[2] = [[ id:'covariates_quant' ], []] input[3] = [[ id:'covariates_cat' ], []] input[4] = 1 From 0a56acec1452c68942ec22155a2bd261851e88de Mon Sep 17 00:00:00 2001 From: lyh970817 Date: Sat, 16 May 2026 08:36:40 +0800 Subject: [PATCH 5/5] Update reml dense GRM contract --- modules/nf-core/gcta/makegrm/environment.yml | 7 + modules/nf-core/gcta/makegrm/main.nf | 42 ++++++ modules/nf-core/gcta/makegrm/meta.yml | 91 +++++++++++ .../nf-core/gcta/makegrm/tests/main.nf.test | 142 ++++++++++++++++++ .../gcta/makegrm/tests/main.nf.test.snap | 111 ++++++++++++++ .../gcta/makegrm/tests/nextflow.config | 3 + modules/nf-core/gcta/reml/main.nf | 17 ++- modules/nf-core/gcta/reml/meta.yml | 47 +++--- modules/nf-core/gcta/reml/tests/main.nf.test | 109 +++++++------- .../nf-core/gcta/reml/tests/main.nf.test.snap | 26 ++++ 10 files changed, 513 insertions(+), 82 deletions(-) create mode 100644 modules/nf-core/gcta/makegrm/environment.yml create mode 100644 modules/nf-core/gcta/makegrm/main.nf create mode 100644 modules/nf-core/gcta/makegrm/meta.yml create mode 100644 modules/nf-core/gcta/makegrm/tests/main.nf.test create mode 100644 modules/nf-core/gcta/makegrm/tests/main.nf.test.snap create mode 100644 modules/nf-core/gcta/makegrm/tests/nextflow.config diff --git a/modules/nf-core/gcta/makegrm/environment.yml b/modules/nf-core/gcta/makegrm/environment.yml new file mode 100644 index 000000000000..3e22ea7b9f20 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makegrm/main.nf b/modules/nf-core/gcta/makegrm/main.nf new file mode 100644 index 000000000000..0bb78639e361 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/main.nf @@ -0,0 +1,42 @@ +process GCTA_MAKEGRM { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam) + + output: + tuple val(meta), path("*.grm.*"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def extra_args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen] + def genotype_extension = genotype_files[0].name.tokenize('.').last() + def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile' + + """ + + gcta \\ + ${multi_file_flag} ${mfile} \\ + --make-grm \\ + --thread-num ${task.cpus} \\ + --out ${prefix} ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.grm.id + touch ${prefix}.grm.bin + touch ${prefix}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/makegrm/meta.yml b/modules/nf-core/gcta/makegrm/meta.yml new file mode 100644 index 000000000000..0c813dadada6 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/meta.yml @@ -0,0 +1,91 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makegrm" +description: Compute a whole dense GRM with GCTA +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: + - "GPL-3.0-only" + identifier: biotools:gcta + +input: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - mfile: + type: file + description: GCTA multi-input manifest consumed by `--mbfile` or + `--mpfile` + pattern: "*.{mbfile,mpfile,txt}" + ontologies: + - edam: "http://edamontology.org/format_2330" + - bed_pgen: + type: file + description: Collection of PLINK primary genotype files referenced by the + multi-input manifest + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: Collection of PLINK variant metadata files referenced by the + multi-input manifest + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: Collection of PLINK sample metadata files referenced by the + multi-input manifest + pattern: "*.{fam,psam}" + ontologies: [] + +output: + grm_files: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - "*.grm.*": + type: file + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + versions_gcta: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test b/modules/nf-core/gcta/makegrm/tests/main.nf.test new file mode 100644 index 000000000000..e5c63233678b --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test @@ -0,0 +1,142 @@ +nextflow_process { + + name "Test Process GCTA_MAKEGRM" + script "../main.nf" + process "GCTA_MAKEGRM" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makegrm" + + test("homo_sapiens popgen - plink2") { + when { + process { + """ + file('gcta_grm.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n' + + input[0] = [ + [ id:'gcta_grm' ], + file('gcta_grm.mpfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm.grm.id', + 'gcta_grm.grm.bin', + 'gcta_grm.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mpfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1") { + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm_bed' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm_bed.grm.id', + 'gcta_grm_bed.grm.bin', + 'gcta_grm_bed.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mbfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 - stub") { + options "-stub" + + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap new file mode 100644 index 000000000000..f8fbe133d5a2 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap @@ -0,0 +1,111 @@ +{ + "homo_sapiens popgen - plink2": { + "content": [ + [ + [ + { + "id": "gcta_grm" + }, + [ + "gcta_grm.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:08:43.209734458" + }, + "homo_sapiens popgen - plink1": { + "content": [ + [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm_bed.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm_bed.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:09:34.058651287" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ], + "grm_files": [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:10:21.024687128" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makegrm/tests/nextflow.config b/modules/nf-core/gcta/makegrm/tests/nextflow.config new file mode 100644 index 000000000000..de31e0218829 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/reml/main.nf b/modules/nf-core/gcta/reml/main.nf index 827faf927b68..02a7769453ec 100644 --- a/modules/nf-core/gcta/reml/main.nf +++ b/modules/nf-core/gcta/reml/main.nf @@ -3,28 +3,28 @@ process GCTA_REML { label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' : - 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" input: - tuple val(meta), path(phenotypes_file) - tuple val(meta2), path(grm_id), path(grm_bin), path(grm_n_bin) + tuple val(meta), path(phenotypes_file), val(mpheno), val(prevalence) + tuple val(meta2), path(grm_files) tuple val(meta3), path(quant_covariates_file) tuple val(meta4), path(cat_covariates_file) - val(mpheno) output: tuple val(meta), path("*.hsq"), emit: reml_results - tuple val("${task.process}"), val("gcta"), eval("gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'"), emit: versions_gcta, topic: versions + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions when: task.ext.when == null || task.ext.when script: def prefix = task.ext.prefix ?: "${meta.id}" - def mpheno_value = (mpheno == null || (mpheno instanceof Collection && mpheno.isEmpty())) ? 1 : mpheno + def mpheno_value = mpheno ?: 1 def mpheno_param = "--mpheno ${mpheno_value}" + def prevalence_param = prevalence ? "--prevalence ${prevalence}" : '' def qcovar_param = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' def covar_param = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' def extra_args = task.ext.args ?: '' @@ -37,6 +37,7 @@ process GCTA_REML { --grm ${meta2.id} \\ --pheno ${phenotypes_file} \\ ${mpheno_param} \\ + ${prevalence_param} \\ ${qcovar_param} \\ ${covar_param} \\ --out "${prefix}" \\ diff --git a/modules/nf-core/gcta/reml/meta.yml b/modules/nf-core/gcta/reml/meta.yml index 8f6b97f5e7a2..2d3f0e0e980f 100644 --- a/modules/nf-core/gcta/reml/meta.yml +++ b/modules/nf-core/gcta/reml/meta.yml @@ -3,7 +3,11 @@ name: "gcta_reml" description: Run univariate REML heritability estimation with a dense GRM keywords: - gcta + - genome-wide complex trait analysis - reml + - restricted maximum likelihood + - grm + - genetic relationship matrix - genetics tools: - "gcta": @@ -11,6 +15,8 @@ tools: homepage: "https://yanglab.westlake.edu.cn/software/gcta/" documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" input: - - meta: type: map @@ -23,25 +29,27 @@ input: pattern: "*.{phe,pheno,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" + - mpheno: + type: integer + description: | + Phenotype column selector passed to `--mpheno`. + Pass `[]` or `null` to use the default first phenotype column + (`--mpheno 1`). + - prevalence: + type: float + description: | + Population prevalence passed to `--prevalence` for case-control traits. + Pass `[]` or `null` for quantitative traits. - - meta2: type: map description: | Groovy map containing dense GRM metadata e.g. `[ id:'plink_simulated' ]` - - grm_id: - type: file - description: Dense GRM sample identifier file - pattern: "*.grm.id" - ontologies: [] - - grm_bin: - type: file - description: Dense GRM binary matrix file - pattern: "*.grm.bin" - ontologies: [] - - grm_n_bin: + - grm_files: type: file - description: Dense GRM sample-count matrix file - pattern: "*.grm.N.bin" + description: Dense GRM bundle containing sample identifier, binary matrix, + and sample-count matrix files + pattern: "*.grm.*" ontologies: [] - - meta3: type: map @@ -65,11 +73,6 @@ input: pattern: "*.{covar,cov,txt,tsv}" ontologies: - edam: "http://edamontology.org/format_3475" - - mpheno: - type: integer - description: | - Phenotype column selector passed to `--mpheno`. - Pass `1` explicitly for the default first phenotype column. output: reml_results: - - meta: @@ -90,7 +93,7 @@ output: - "gcta": type: string description: The tool name - - "gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//'": + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version topics: @@ -101,10 +104,10 @@ topics: - gcta: type: string description: The tool name - - gcta --version 2>&1 | grep 'version v' | tr -s ' ' | cut -d' ' -f3 | sed 's/^v//': + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": type: eval description: The command used to retrieve the GCTA version authors: - - "@andongni" + - "@lyh970817" maintainers: - - "@andongni" + - "@lyh970817" diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test b/modules/nf-core/gcta/reml/tests/main.nf.test index fa8e031c366d..ce2730ab94ed 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test +++ b/modules/nf-core/gcta/reml/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "modules_nfcore" tag "gcta" tag "gcta/reml" - tag "gcta/makegrmpart" + tag "gcta/makegrm" tag "gawk" setup { @@ -40,6 +40,20 @@ nextflow_process { } } + run("GAWK", alias: "GAWK_BINARY_PHENOTYPE") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'BinaryTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 + 1 }').collectFile(name:'binary_phenotypes.awk') + input[2] = false + """ + } + } + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { script "../../../gawk/main.nf" process { @@ -68,14 +82,14 @@ nextflow_process { } } - run("GCTA_MAKEGRMPART", alias: "GCTA_MAKEGRMPART_DENSE") { - script "../../makegrmpart/main.nf" + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_DENSE") { + script "../../makegrm/main.nf" process { """ file('plink_simulated.mbfile').text = 'plink_simulated\\n' input[0] = [ - [ id:'plink_simulated_dense', part_gcta_job:1, nparts_gcta:1 ], + [ id:'plink_simulated_dense' ], file('plink_simulated.mbfile'), [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) @@ -87,7 +101,6 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) ] ] - input[1] = [[ id:'all_variants' ], []] """ } } @@ -98,16 +111,10 @@ nextflow_process { when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } - - input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMpheno2' ], pheno] } - input[1] = dense_grm + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMpheno2' ], pheno, 2, []] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output - input[4] = 2 """ } } @@ -132,16 +139,10 @@ nextflow_process { when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } - - input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output - input[1] = dense_grm + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [meta, pheno, 1, []] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output - input[4] = 1 """ } } @@ -166,16 +167,10 @@ nextflow_process { when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } - - input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output - input[1] = dense_grm + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [meta, pheno, 1, null] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = [[ id:'covariates_quant' ], []] input[3] = [[ id:'covariates_cat' ], []] - input[4] = 1 """ } } @@ -195,21 +190,43 @@ nextflow_process { } } - test("homo_sapiens popgen - dense GRM mpheno defaults to first phenotype when empty") { + test("homo_sapiens popgen - binary phenotype with dense GRM and prevalence") { config "./nextflow.config" when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] + input[0] = GAWK_BINARY_PHENOTYPE.out.output.map { meta, pheno -> [[ id:'BinaryTraitPrevalence' ], pheno, 1, 0.1] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files + input[2] = [[ id:'covariates_quant' ], []] + input[3] = [[ id:'covariates_cat' ], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.reml_results.size() == 1 }, + { assert process.out.reml_results.get(0).get(0).id == "BinaryTraitPrevalence" }, + { + assert snapshot( + process.out.reml_results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } - input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMphenoDefault' ], pheno] } - input[1] = dense_grm + test("homo_sapiens popgen - dense GRM mpheno defaults to first phenotype when empty") { + config "./nextflow.config" + when { + process { + """ + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitMphenoDefault' ], pheno, [], []] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output - input[4] = [] """ } } @@ -234,16 +251,10 @@ nextflow_process { when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } - - input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitInvalidMpheno' ], pheno] } - input[1] = dense_grm + input[0] = GAWK_MULTI_PHENOTYPES.out.output.map { meta, pheno -> [[ id:'QuantitativeTraitInvalidMpheno' ], pheno, 3, []] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = GAWK_QUANTITATIVE_COVARIATES.out.output input[3] = GAWK_CATEGORICAL_COVARIATES.out.output - input[4] = 3 """ } } @@ -263,16 +274,10 @@ nextflow_process { when { process { """ - dense_grm = GCTA_MAKEGRMPART_DENSE.out.grm_files.map { meta, grm_id, grm_bin, grm_n_bin -> - def prefix = meta.id + '.part_' + meta.nparts_gcta + '_' + meta.part_gcta_job - [[ id:prefix ], grm_id, grm_bin, grm_n_bin] - } - - input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output - input[1] = dense_grm + input[0] = GAWK_QUANTITATIVE_PHENOTYPE.out.output.map { meta, pheno -> [meta, pheno, 1, []] } + input[1] = GCTA_MAKEGRM_DENSE.out.grm_files input[2] = [[ id:'covariates_quant' ], []] input[3] = [[ id:'covariates_cat' ], []] - input[4] = 1 """ } } diff --git a/modules/nf-core/gcta/reml/tests/main.nf.test.snap b/modules/nf-core/gcta/reml/tests/main.nf.test.snap index cacd5b4c1355..78669adf1c8e 100644 --- a/modules/nf-core/gcta/reml/tests/main.nf.test.snap +++ b/modules/nf-core/gcta/reml/tests/main.nf.test.snap @@ -128,5 +128,31 @@ "nextflow": "25.10.4" }, "timestamp": "2026-03-21T18:45:19.433138352" + }, + "homo_sapiens popgen - binary phenotype with dense GRM and prevalence": { + "content": [ + [ + [ + { + "id": "BinaryTraitPrevalence" + }, + "BinaryTraitPrevalence.hsq:md5,86672f6a0b3c49b2347d402cb03b8606" + ] + ], + { + "versions_gcta": [ + [ + "GCTA_REML", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-13T16:08:12.836619238" } } \ No newline at end of file