diff --git a/modules/nf-core/gcta/fastgwa/environment.yml b/modules/nf-core/gcta/fastgwa/environment.yml new file mode 100644 index 00000000000..3e22ea7b9f2 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/fastgwa/main.nf b/modules/nf-core/gcta/fastgwa/main.nf new file mode 100644 index 00000000000..c0eda47b73e --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/main.nf @@ -0,0 +1,60 @@ +process GCTA_FASTGWA { + tag "${meta.id}:${mpheno}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(bed_pgen), path(bim_pvar), path(fam_psam) + tuple val(meta2), path(sparse_grm_files) + tuple val(meta3), path(phenotype_file), val(mpheno), val(is_binary) + tuple val(meta4), path(quant_covariates_file) + tuple val(meta5), path(cat_covariates_file) + val mlm_exact + + output: + tuple val(meta), path("*.fastGWA"), val(mpheno), val(is_binary), emit: results + tuple val(meta), path("*.log"), val(mpheno), val(is_binary), emit: log + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def genotype_suffix = bed_pgen.name.tokenize('.').last() + def genotype_flag = genotype_suffix == 'pgen' ? '--pfile' : '--bfile' + def genotype_prefix = bed_pgen.baseName + def grm_arg = sparse_grm_files ? "--grm-sparse ${meta2.id}" : '' + def mode_arg = is_binary ? '--fastGWA-lr' : (mlm_exact ? '--fastGWA-mlm-exact' : '--fastGWA-mlm') + def qcovar_arg = quant_covariates_file ? "--qcovar ${quant_covariates_file}" : '' + def covar_arg = cat_covariates_file ? "--covar ${cat_covariates_file}" : '' + def mpheno_arg = mpheno ? "--mpheno ${mpheno}" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + def out = mpheno ? "${prefix}_${mpheno}" : "${prefix}" + def extra_args = task.ext.args ?: '' + + """ + set -euo pipefail + + gcta \\ + ${genotype_flag} ${genotype_prefix} \\ + ${grm_arg} \\ + ${mode_arg} \\ + --pheno ${phenotype_file} \\ + ${qcovar_arg} \\ + ${covar_arg} \\ + ${mpheno_arg} \\ + --thread-num ${task.cpus} \\ + --out ${out} ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def out = mpheno ? "${prefix}_${mpheno}" : "${prefix}" + """ + touch ${out}.fastGWA + touch ${out}.log + """ +} diff --git a/modules/nf-core/gcta/fastgwa/meta.yml b/modules/nf-core/gcta/fastgwa/meta.yml new file mode 100644 index 00000000000..3d63593878e --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/meta.yml @@ -0,0 +1,161 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_fastgwa" +description: Run GCTA fastGWA association modes (`--fastGWA-mlm`, `--fastGWA-mlm-exact`, and `--fastGWA-lr`) with PLINK genotype inputs +keywords: + - gcta + - genome-wide complex trait analysis + - fastgwa + - fast genome-wide association + - gwas + - genome-wide association study + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" +input: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - bed_pgen: + type: file + description: PLINK primary genotype file, either `.bed` or `.pgen` + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: PLINK sidecar file, either `.bim` or `.pvar` + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: PLINK sidecar file, either `.fam` or `.psam` + pattern: "*.{fam,psam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy map containing sparse GRM metadata + e.g. `[ id:'plink_simulated_sp' ]` + Used when sparse GRM files are supplied + - sparse_grm_files: + type: file + description: Sparse GRM sidecar files, pass `[]` when absent + pattern: "*.grm.{id,sp}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy map containing phenotype metadata + Keep only stable phenotype metadata in this map + e.g. `[ id:'plink_simulated' ]` + - phenotype_file: + type: file + description: Phenotype file + pattern: "*.{phe,pheno,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - mpheno: + type: integer + description: | + Phenotype column selector passed to `--mpheno`. + Pass `[]` when absent. + - is_binary: + type: boolean + description: Whether to run logistic fastGWA mode for a binary trait + - - meta4: + type: map + description: | + Groovy map containing quantitative covariate metadata + e.g. `[ id:'covariates_quant' ]` + - quant_covariates_file: + type: file + description: Quantitative covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - - meta5: + type: map + description: | + Groovy map containing categorical covariate metadata + e.g. `[ id:'covariates_cat' ]` + - cat_covariates_file: + type: file + description: Categorical covariates file, pass `[]` when absent + pattern: "*.{covar,cov,txt,tsv}" + ontologies: + - edam: "http://edamontology.org/format_3475" + - mlm_exact: + type: boolean + description: | + Apply `--fastGWA-mlm-exact` for non-binary phenotypes. + Ignored when `is_binary` is true because binary phenotypes use `--fastGWA-lr`. +output: + results: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - "*.fastGWA": + type: file + description: FastGWA association results + pattern: "*.fastGWA" + ontologies: + - edam: "http://edamontology.org/format_2330" + - mpheno: + type: integer + description: Phenotype column selector used for the emitted result + - is_binary: + type: boolean + description: Whether `--fastGWA-lr` was used for the emitted result + log: + - - meta: + type: map + description: | + Groovy map containing PLINK genotype metadata + e.g. `[ id:'plink_simulated' ]` + - "*.log": + type: file + description: GCTA fastGWA log file + pattern: "*.log" + ontologies: + - edam: "http://edamontology.org/format_2330" + - mpheno: + type: integer + description: Phenotype column selector used for the emitted log + - is_binary: + type: boolean + description: Whether `--fastGWA-lr` was used for the emitted log + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test b/modules/nf-core/gcta/fastgwa/tests/main.nf.test new file mode 100644 index 00000000000..58aa357ac96 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test @@ -0,0 +1,412 @@ +nextflow_process { + + name "Test Process GCTA_FASTGWA" + script "../main.nf" + process "GCTA_FASTGWA" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/fastgwa" + tag "gcta/makegrm" + tag "gcta/makebksparse" + tag "gawk" + config "./nextflow.config" + + setup { + run("GAWK", alias: "GAWK_QUANTITATIVE_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'quantitative_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_BINARY_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'BinaryTrait' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_binary_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'binary_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_MULTI_PHENO") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'QuantitativeTraits' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_quantitative_phenoname.phe', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3, (\$3 * 1.7) + ((NR % 5) / 10.0) }').collectFile(name:'multi_phenotype.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_QUANTITATIVE_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_quant' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$4, \$5, \$6 }').collectFile(name:'quantitative_covariates.awk') + input[2] = false + """ + } + } + + run("GAWK", alias: "GAWK_CATEGORICAL_COVARIATES") { + script "../../../gawk/main.nf" + process { + """ + input[0] = [ + [ id:'covariates_cat' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated_covariates.txt', checkIfExists: true) + ] + input[1] = Channel.of('FNR == 1 { next } { print \$1, \$2, \$3 }').collectFile(name:'categorical_covariates.awk') + input[2] = false + """ + } + } + + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_DENSE") { + script "../../makegrm/main.nf" + process { + """ + file('plink_simulated.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'plink_simulated_dense' ], + file('plink_simulated.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + run("GCTA_MAKEBKSPARSE", alias: "GCTA_MAKEBKSPARSE_DENSE") { + script "../../makebksparse/main.nf" + process { + """ + input[0] = GCTA_MAKEGRM_DENSE.out.grm_files + input[1] = Channel.value(0.05) + """ + } + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id ], phenotype_file, 1, false] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = quantitative_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 1 }, + { assert process.out.log.get(0).get(3) == false }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_1.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_1.log" }, + { assert path(process.out.results.get(0).get(1)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--fastGWA-mlm-exact") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense_sp") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--mpheno 1") }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype mpheno selection") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] + } + + multi_pheno = GAWK_MULTI_PHENO.out.output.map { meta, phenotype_file -> + [[ id:'QuantitativeTraitMpheno2' ], phenotype_file, 2, false] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = multi_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(2) == 2 }, + { assert process.out.results.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 2 }, + { assert process.out.log.get(0).get(3) == false }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_2.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_2.log" }, + { assert path(process.out.results.get(0).get(1)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--mpheno 2") }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 with sparse GRM and binary phenotype") { + when { + process { + """ + binary_pheno = GAWK_BINARY_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id ], phenotype_file, 1, true] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = [[ id:'empty_sparse' ], []] + input[2] = binary_pheno + input[3] = GAWK_QUANTITATIVE_COVARIATES.out.output + input[4] = GAWK_CATEGORICAL_COVARIATES.out.output + input[5] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == true }, + { assert process.out.log.get(0).get(2) == 1 }, + { assert process.out.log.get(0).get(3) == true }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_1.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_1.log" }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--fastGWA-lr") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--mpheno 1") }, + { assert !file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--grm-sparse") }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink2 with sparse GRM and quantitative phenotype") { + + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id ], phenotype_file, 1, false] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = quantitative_pheno + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == false }, + { assert process.out.log.get(0).get(2) == 1 }, + { assert process.out.log.get(0).get(3) == false }, + { assert path(process.out.results.get(0).get(1)).fileName.toString() == "plink_simulated_1.fastGWA" }, + { assert path(process.out.log.get(0).get(1)).fileName.toString() == "plink_simulated_1.log" }, + { assert path(process.out.results.get(0).get(1)).readLines().get(0).contains("BETA") }, + { assert file(path(process.out.results.get(0).get(1)).parent.toString() + "/.command.sh").text.contains("--grm-sparse plink_simulated_dense_sp") }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - non-binary fails when sparse GRM prefix mismatches files") { + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id ], phenotype_file, 1, false] + } + + sparse_grm_bad_prefix = sparse_grm.map { meta, sparse_grm_files -> + [[ id:'incorrect_sparse_prefix' ], sparse_grm_files] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bed", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.bim", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.fam", checkIfExists: true) + ] + input[1] = sparse_grm_bad_prefix + input[2] = quantitative_pheno + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] + input[5] = false + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.exitStatus != 0 }, + { assert process.stdout.toString().contains("incorrect_sparse_prefix") }, + { assert process.stdout.toString().contains("incorrect_sparse_prefix.grm.id") } + ) + } + } + + test("homo_sapiens popgen - plink2 with sparse GRM - stub") { + options "-stub" + + when { + process { + """ + sparse_grm = GCTA_MAKEBKSPARSE_DENSE.out.sparse_grm_files.map { meta, sparse_grm_files -> + [[ id:meta.id + '_sp' ], sparse_grm_files] + } + + quantitative_pheno = GAWK_QUANTITATIVE_PHENO.out.output.map { meta, phenotype_file -> + [[ id:meta.id ], phenotype_file, 1, false] + } + + input[0] = [ + [ id:"plink_simulated" ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pgen", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.pvar", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/plink_simulated.psam", checkIfExists: true) + ] + input[1] = sparse_grm + input[2] = quantitative_pheno + input[3] = [[ id:'covariates_quant' ], []] + input[4] = [[ id:'covariates_cat' ], []] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.results.size() == 1 }, + { assert process.out.log.size() == 1 }, + { assert process.out.results.get(0).get(0).id == "plink_simulated" }, + { assert process.out.results.get(0).get(2) == 1 }, + { assert process.out.results.get(0).get(3) == false }, + { + assert snapshot( + process.out.results, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap new file mode 100644 index 00000000000..9b41d8bfd15 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "homo_sapiens popgen - plink2 with sparse GRM - stub": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.fastGWA:md5,d41d8cd98f00b204e9800998ecf8427e", + 1, + false + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:51:24.403157293" + }, + "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.fastGWA:md5,d9190e07273a3de2a15a6e7053aed487", + 1, + false + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:41:12.959660072" + }, + "homo_sapiens popgen - plink1 with sparse GRM and quantitative phenotype mpheno selection": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_2.fastGWA:md5,d10da1dac8dccf55a9000c4813d4f625", + 2, + false + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:43:43.761213489" + }, + "homo_sapiens popgen - plink2 with sparse GRM and quantitative phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.fastGWA:md5,6742a23a7e4280161c104027b1cac012", + 1, + false + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:48:22.048462109" + }, + "homo_sapiens popgen - plink1 with sparse GRM and binary phenotype": { + "content": [ + [ + [ + { + "id": "plink_simulated" + }, + "plink_simulated_1.fastGWA:md5,723602dcb94b8a08b3652f1491dcd2ee", + 1, + true + ] + ], + { + "versions_gcta": [ + [ + "GCTA_FASTGWA", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:46:07.594442521" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/fastgwa/tests/nextflow.config b/modules/nf-core/gcta/fastgwa/tests/nextflow.config new file mode 100644 index 00000000000..de31e021882 --- /dev/null +++ b/modules/nf-core/gcta/fastgwa/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml new file mode 100644 index 00000000000..3e22ea7b9f2 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf new file mode 100644 index 00000000000..cd62a7dbe74 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/main.nf @@ -0,0 +1,39 @@ +process GCTA_MAKEBKSPARSE { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(grm_files) + val cutoff + + output: + tuple val(meta), path("*_sp.grm.*"), emit: sparse_grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def extra_args = task.ext.args ?: '' + + """ + gcta \\ + --grm ${meta.id} \\ + --make-bK-sparse ${cutoff} \\ + --out ${prefix}_sp \\ + --thread-num ${task.cpus} \\ + ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_sp.grm.id + touch ${prefix}_sp.grm.sp + """ +} diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml new file mode 100644 index 00000000000..e386fc180e5 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/meta.yml @@ -0,0 +1,76 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makebksparse" +description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - sparse + - genetics +tools: + - "gcta": + description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/" + licence: ["GPL-3.0-only"] + identifier: "biotools:gcta" + +input: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`. + Input files must therefore be staged as `.grm.id`, `.grm.bin`, and `.grm.N.bin`. + - grm_files: + type: file + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + - cutoff: + type: float + description: Sparse GRM cutoff passed to `--make-bK-sparse` + +output: + sparse_grm_files: + - - meta: + type: map + description: | + Groovy map containing dense GRM metadata + e.g. `[ id:'plink_simulated' ]` + `meta.id` is preserved from the input dense GRM basename contract. + - "*_sp.grm.*": + type: file + description: Sparse GRM sidecar files + pattern: "*_sp.grm.{id,sp}" + ontologies: [] + versions_gcta: + - - "${task.process}": + type: string + description: The process the version was collected from + - "gcta": + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +topics: + versions: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to retrieve the GCTA version + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test new file mode 100644 index 00000000000..99e77de8f04 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test @@ -0,0 +1,131 @@ +nextflow_process { + + name "Test Process GCTA_MAKEBKSPARSE" + script "../main.nf" + process "GCTA_MAKEBKSPARSE" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makebksparse" + tag "gcta/makegrm" + + setup { + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") { + script "../../makegrm/main.nf" + process { + """ + file('contract_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'contract_dense' ], + file('contract_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") { + script "../../makegrm/main.nf" + process { + """ + file('stub_dense.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'stub_dense' ], + file('stub_dense.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + } + + test("homo_sapiens popgen - create sparse GRM") { + config "./nextflow.config" + + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" }, + { assert process.out.sparse_grm_files.get(0).size() == 2 }, + { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") } + ) + } + } + + test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") { + config "./nextflow.config" + + when { + process { + """ + input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files -> + [[ id:'contract_dense_mismatch' ], grm_files] + } + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.exitStatus != 0 } + ) + } + } + + test("homo_sapiens popgen - create sparse GRM - stub") { + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = GCTA_MAKEGRM_STUB.out.grm_files + input[1] = Channel.value(0.05) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.sparse_grm_files.size() == 1 }, + { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" }, + { assert process.out.sparse_grm_files.get(0).size() == 2 }, + { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap new file mode 100644 index 00000000000..cd4f74b2378 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "stub_sparse_grm_files": { + "content": [ + [ + [ + { + "id": "stub_dense" + }, + [ + "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e", + "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:14:25.094890591" + }, + "versions": { + "content": [ + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:38.045354436" + }, + "sparse_grm_files": { + "content": [ + [ + [ + { + "id": "contract_dense" + }, + [ + "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9", + "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a" + ] + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:11:38.107540519" + }, + "stub_versions": { + "content": [ + { + "versions_gcta": [ + [ + "GCTA_MAKEBKSPARSE", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-21T00:30:48.775770627" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makebksparse/tests/nextflow.config b/modules/nf-core/gcta/makebksparse/tests/nextflow.config new file mode 100644 index 00000000000..de31e021882 --- /dev/null +++ b/modules/nf-core/gcta/makebksparse/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +} diff --git a/modules/nf-core/gcta/makegrm/environment.yml b/modules/nf-core/gcta/makegrm/environment.yml new file mode 100644 index 00000000000..3e22ea7b9f2 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gcta=1.94.1 diff --git a/modules/nf-core/gcta/makegrm/main.nf b/modules/nf-core/gcta/makegrm/main.nf new file mode 100644 index 00000000000..0bb78639e36 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/main.nf @@ -0,0 +1,42 @@ +process GCTA_MAKEGRM { + tag "${meta.id}" + label 'process_medium' + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data' + : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}" + + input: + tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam) + + output: + tuple val(meta), path("*.grm.*"), emit: grm_files + tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def extra_args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen] + def genotype_extension = genotype_files[0].name.tokenize('.').last() + def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile' + + """ + + gcta \\ + ${multi_file_flag} ${mfile} \\ + --make-grm \\ + --thread-num ${task.cpus} \\ + --out ${prefix} ${extra_args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.grm.id + touch ${prefix}.grm.bin + touch ${prefix}.grm.N.bin + """ +} diff --git a/modules/nf-core/gcta/makegrm/meta.yml b/modules/nf-core/gcta/makegrm/meta.yml new file mode 100644 index 00000000000..0c813dadada --- /dev/null +++ b/modules/nf-core/gcta/makegrm/meta.yml @@ -0,0 +1,91 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "gcta_makegrm" +description: Compute a whole dense GRM with GCTA +keywords: + - gcta + - genome-wide complex trait analysis + - grm + - genetic relationship matrix + - genetics +tools: + - "gcta": + description: "GCTA is a tool for genome-wide complex trait analysis." + homepage: "https://yanglab.westlake.edu.cn/software/gcta/" + documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf" + tool_dev_url: "https://github.com/jianyangqt/gcta" + licence: + - "GPL-3.0-only" + identifier: biotools:gcta + +input: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - mfile: + type: file + description: GCTA multi-input manifest consumed by `--mbfile` or + `--mpfile` + pattern: "*.{mbfile,mpfile,txt}" + ontologies: + - edam: "http://edamontology.org/format_2330" + - bed_pgen: + type: file + description: Collection of PLINK primary genotype files referenced by the + multi-input manifest + pattern: "*.{bed,pgen}" + ontologies: + - edam: "http://edamontology.org/format_3003" + - bim_pvar: + type: file + description: Collection of PLINK variant metadata files referenced by the + multi-input manifest + pattern: "*.{bim,pvar}" + ontologies: [] + - fam_psam: + type: file + description: Collection of PLINK sample metadata files referenced by the + multi-input manifest + pattern: "*.{fam,psam}" + ontologies: [] + +output: + grm_files: + - - meta: + type: map + description: | + Groovy Map containing GRM sample metadata + e.g. `[ id:'gcta_grm' ]` + - "*.grm.*": + type: file + description: Dense GRM sidecar files + pattern: "*.grm.{id,bin,N.bin}" + ontologies: [] + versions_gcta: + - - ${task.process}: + type: string + description: The process the version was collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gcta: + type: string + description: The tool name + - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'": + type: eval + description: The command used to generate the version of the tool + +authors: + - "@lyh970817" +maintainers: + - "@lyh970817" diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test b/modules/nf-core/gcta/makegrm/tests/main.nf.test new file mode 100644 index 00000000000..e5c63233678 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test @@ -0,0 +1,142 @@ +nextflow_process { + + name "Test Process GCTA_MAKEGRM" + script "../main.nf" + process "GCTA_MAKEGRM" + + tag "modules" + tag "modules_nfcore" + tag "gcta" + tag "gcta/makegrm" + + test("homo_sapiens popgen - plink2") { + when { + process { + """ + file('gcta_grm.mpfile').text = 'plink_simulated plink_simulated.pgen plink_simulated.psam plink_simulated.pvar\\n' + + input[0] = [ + [ id:'gcta_grm' ], + file('gcta_grm.mpfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pgen', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.pvar', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.psam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm.grm.id', + 'gcta_grm.grm.bin', + 'gcta_grm.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mpfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1") { + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.grm_files.size() == 1 }, + { assert process.out.grm_files.get(0).get(0).id == 'gcta_grm_bed' }, + { assert process.out.grm_files.get(0).get(0).keySet() == ['id'] as Set }, + { assert process.out.grm_files.get(0).get(1).size() == 3 }, + { + assert process.out.grm_files.get(0).get(1).collect { file(it).name }.toSet() == [ + 'gcta_grm_bed.grm.id', + 'gcta_grm_bed.grm.bin', + 'gcta_grm_bed.grm.N.bin' + ] as Set + }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--make-grm') }, + { assert file(path(process.out.grm_files.get(0).get(1)[0]).parent.toString() + '/.command.sh').text.contains('--mbfile') }, + { + assert snapshot( + process.out.grm_files, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + ) + } + } + + test("homo_sapiens popgen - plink1 - stub") { + options "-stub" + + when { + process { + """ + file('gcta_grm.mbfile').text = 'plink_simulated\\n' + + input[0] = [ + [ id:'gcta_grm_bed' ], + file('gcta_grm.mbfile'), + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap new file mode 100644 index 00000000000..f8fbe133d5a --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/main.nf.test.snap @@ -0,0 +1,111 @@ +{ + "homo_sapiens popgen - plink2": { + "content": [ + [ + [ + { + "id": "gcta_grm" + }, + [ + "gcta_grm.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:08:43.209734458" + }, + "homo_sapiens popgen - plink1": { + "content": [ + [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,acaa43bbbf2253d392537a178ecf09a4", + "gcta_grm_bed.grm.bin:md5,45f8dff14bda17d50009a21050572228", + "gcta_grm_bed.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + { + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:09:34.058651287" + }, + "homo_sapiens popgen - plink1 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ], + "grm_files": [ + [ + { + "id": "gcta_grm_bed" + }, + [ + "gcta_grm_bed.grm.N.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "gcta_grm_bed.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_gcta": [ + [ + "GCTA_MAKEGRM", + "gcta", + "1.94.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-15T21:10:21.024687128" + } +} \ No newline at end of file diff --git a/modules/nf-core/gcta/makegrm/tests/nextflow.config b/modules/nf-core/gcta/makegrm/tests/nextflow.config new file mode 100644 index 00000000000..de31e021882 --- /dev/null +++ b/modules/nf-core/gcta/makegrm/tests/nextflow.config @@ -0,0 +1,3 @@ +params { + modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +}