nf-core · lyh970817 · Mar 20, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/modules/nf-core/gcta/makebksparse/environment.yml b/modules/nf-core/gcta/makebksparse/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::gcta=1.94.1
diff --git a/modules/nf-core/gcta/makebksparse/main.nf b/modules/nf-core/gcta/makebksparse/main.nf
@@ -0,0 +1,40 @@
+process GCTA_MAKEBKSPARSE {
+    tag "${meta.id}"
+    label 'process_medium'
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
+        : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
+
+    input:
+    tuple val(meta), path(grm_files)
+    val cutoff
+
+    output:
+    tuple val(meta), path("*_sp.grm.*"), emit: sparse_grm_files
+    tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def extra_args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+
+    gcta \\
+        --grm ${meta.id} \\
+        --make-bK-sparse ${cutoff} \\
+        --out ${prefix}_sp \\
+        --thread-num ${task.cpus} \\
+        ${extra_args}
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_sp.grm.id
+    touch ${prefix}_sp.grm.sp
+    """
+}
diff --git a/modules/nf-core/gcta/makebksparse/meta.yml b/modules/nf-core/gcta/makebksparse/meta.yml
@@ -0,0 +1,76 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "gcta_makebksparse"
+description: Create a sparse GRM from a dense GRM for downstream fastGWA analyses
+keywords:
+  - gcta
+  - genome-wide complex trait analysis
+  - grm
+  - genetic relationship matrix
+  - sparse
+  - genetics
+tools:
+  - "gcta":
+      description: "Genome-wide Complex Trait Analysis (GCTA) estimates genetic relationships, variance components, and association statistics from genome-wide data."
+      homepage: "https://yanglab.westlake.edu.cn/software/gcta/"
+      documentation: "https://yanglab.westlake.edu.cn/software/gcta/static/gcta_doc_latest.pdf"
+      tool_dev_url: "https://yanglab.westlake.edu.cn/software/gcta/"
+      licence: ["GPL-3.0-only"]
+      identifier: "biotools:gcta"
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy map containing dense GRM metadata
+          e.g. `[ id:'plink_simulated' ]`
+          `meta.id` is required and is the dense GRM basename contract used by `gcta --grm`.
+          Input files must therefore be staged as `<meta.id>.grm.id`, `<meta.id>.grm.bin`, and `<meta.id>.grm.N.bin`.
+    - grm_files:
+        type: file
+        description: Dense GRM sidecar files
+        pattern: "*.grm.{id,bin,N.bin}"
+        ontologies: []
+  - cutoff:
+      type: float
+      description: Sparse GRM cutoff passed to `--make-bK-sparse`
+
+output:
+  sparse_grm_files:
+    - - meta:
+          type: map
+          description: |
+            Groovy map containing dense GRM metadata
+            e.g. `[ id:'plink_simulated' ]`
+            `meta.id` is preserved from the input dense GRM basename contract.
+      - "*_sp.grm.*":
+          type: file
+          description: Sparse GRM sidecar files
+          pattern: "*_sp.grm.{id,sp}"
+          ontologies: []
+  versions_gcta:
+    - - "${task.process}":
+          type: string
+          description: The process the version was collected from
+      - "gcta":
+          type: string
+          description: The tool name
+      - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
+          type: eval
+          description: The command used to retrieve the GCTA version
+
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process the version was collected from
+      - gcta:
+          type: string
+          description: The tool name
+      - "gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'":
+          type: eval
+          description: The command used to retrieve the GCTA version
+
+authors:
+  - "@lyh970817"
+maintainers:
+  - "@lyh970817"
diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test b/modules/nf-core/gcta/makebksparse/tests/main.nf.test
@@ -0,0 +1,129 @@
+nextflow_process {
+
+    name "Test Process GCTA_MAKEBKSPARSE"
+    script "../main.nf"
+    process "GCTA_MAKEBKSPARSE"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "gcta"
+    tag "gcta/makebksparse"
+    tag "gcta/makegrm"
+
+    setup {
+        run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_CONTRACT") {
+            script "../../makegrm/main.nf"
+            process {
+                """
+                file('contract_dense.mbfile').text = 'plink_simulated\\n'
+
+                input[0] = [
+                    [ id:'contract_dense' ],
+                    file('contract_dense.mbfile'),
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
+                    ],
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
+                    ],
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
+                    ]
+                ]
+                """
+            }
+        }
+
+        run("GCTA_MAKEGRM", alias: "GCTA_MAKEGRM_STUB") {
+            script "../../makegrm/main.nf"
+            process {
+                """
+                file('stub_dense.mbfile').text = 'plink_simulated\\n'
+
+                input[0] = [
+                    [ id:'stub_dense' ],
+                    file('stub_dense.mbfile'),
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bed', checkIfExists: true)
+                    ],
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.bim', checkIfExists: true)
+                    ],
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/plink_simulated.fam', checkIfExists: true)
+                    ]
+                ]
+                """
+            }
+        }
+    }
+
+    test("homo_sapiens popgen - create sparse GRM") {
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files
+                input[1] = Channel.value(0.05)
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.sparse_grm_files.size() == 1 },
+                { assert process.out.sparse_grm_files.get(0).get(0).id == "contract_dense" },
+                { assert snapshot(process.out.sparse_grm_files).match("sparse_grm_files") },
+                { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("versions") }
+            )
+        }
+    }
+
+    test("homo_sapiens popgen - fail when meta id does not match dense GRM basename") {
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = GCTA_MAKEGRM_CONTRACT.out.grm_files.map { meta, grm_files ->
+                    [[ id:'contract_dense_mismatch' ], grm_files]
+                }
+                input[1] = Channel.value(0.05)
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert !process.success },
+                { assert process.exitStatus != 0 }
+            )
+        }
+    }
+
+    test("homo_sapiens popgen - create sparse GRM - stub") {
+        options "-stub"
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = GCTA_MAKEGRM_STUB.out.grm_files
+                input[1] = Channel.value(0.05)
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.sparse_grm_files.size() == 1 },
+                { assert process.out.sparse_grm_files.get(0).get(0).id == "stub_dense" },
+                { assert snapshot(process.out.sparse_grm_files).match("stub_sparse_grm_files") },
+                { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match("stub_versions") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap b/modules/nf-core/gcta/makebksparse/tests/main.nf.test.snap
@@ -0,0 +1,78 @@
+{
+    "stub_sparse_grm_files": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "stub_dense"
+                    },
+                    [
+                        "stub_dense_sp.grm.id:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "stub_dense_sp.grm.sp:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.4"
+        },
+        "timestamp": "2026-05-15T21:12:21.136559698"
+    },
+    "versions": {
+        "content": [
+            {
+                "versions_gcta": [
+                    [
+                        "GCTA_MAKEBKSPARSE",
+                        "gcta",
+                        "1.94.1"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.4"
+        },
+        "timestamp": "2026-03-21T00:30:38.045354436"
+    },
+    "sparse_grm_files": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "contract_dense"
+                    },
+                    [
+                        "contract_dense_sp.grm.id:md5,4f9aa36c44a417ff6d7caa9841e66ad9",
+                        "contract_dense_sp.grm.sp:md5,1b78fe4b14c8690943d7687dd22ba85a"
+                    ]
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.4"
+        },
+        "timestamp": "2026-05-15T21:09:25.501833656"
+    },
+    "stub_versions": {
+        "content": [
+            {
+                "versions_gcta": [
+                    [
+                        "GCTA_MAKEBKSPARSE",
+                        "gcta",
+                        "1.94.1"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.4"
+        },
+        "timestamp": "2026-03-21T00:30:48.775770627"
+    }
+}
diff --git a/modules/nf-core/gcta/makebksparse/tests/nextflow.config b/modules/nf-core/gcta/makebksparse/tests/nextflow.config
@@ -0,0 +1,3 @@
+params {
+    modules_testdata_base_path = System.getenv("NF_MODULES_TESTDATA_BASE_PATH") ?: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/"
+}
diff --git a/modules/nf-core/gcta/makegrm/environment.yml b/modules/nf-core/gcta/makegrm/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::gcta=1.94.1
diff --git a/modules/nf-core/gcta/makegrm/main.nf b/modules/nf-core/gcta/makegrm/main.nf
@@ -0,0 +1,42 @@
+process GCTA_MAKEGRM {
+    tag "${meta.id}"
+    label 'process_medium'
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+        ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46b0d05f0daa47561d87d2a9cac5e51edc2c78e26f1bbab439c688386241a274/data'
+        : 'community.wave.seqera.io/library/gcta:1.94.1--9bc35dc424fcf6e9'}"
+
+    input:
+    tuple val(meta), path(mfile), path(bed_pgen), path(bim_pvar), path(fam_psam)
+
+    output:
+    tuple val(meta), path("*.grm.*"), emit: grm_files
+    tuple val("${task.process}"), val("gcta"), eval("gcta --version | sed -En 's/^[*] version v([0-9.]*).*/\\1/p'"), emit: versions_gcta, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def extra_args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def genotype_files = bed_pgen instanceof List ? bed_pgen : [bed_pgen]
+    def genotype_extension = genotype_files[0].name.tokenize('.').last()
+    def multi_file_flag = genotype_extension == 'pgen' ? '--mpfile' : '--mbfile'
+
+    """
+
+    gcta \\
+        ${multi_file_flag} ${mfile} \\
+        --make-grm \\
+        --thread-num ${task.cpus} \\
+        --out ${prefix} ${extra_args}
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.grm.id
+    touch ${prefix}.grm.bin
+    touch ${prefix}.grm.N.bin
+    """
+}