From 6af3ab86bd6e92955dec43749b2e353a1b79d0b8 Mon Sep 17 00:00:00 2001 From: donald Date: Thu, 26 Mar 2026 15:35:18 +0100 Subject: [PATCH 1/2] Add snpclustering subworkflow --- subworkflows/nf-core/snpclustering/main.nf | 51 ++++++++++++++++ subworkflows/nf-core/snpclustering/meta.yml | 60 +++++++++++++++++++ .../nf-core/snpclustering/tests/main.nf.test | 34 +++++++++++ .../nf-core/snpclustering/tests/tags.yml | 2 + 4 files changed, 147 insertions(+) create mode 100644 subworkflows/nf-core/snpclustering/main.nf create mode 100644 subworkflows/nf-core/snpclustering/meta.yml create mode 100644 subworkflows/nf-core/snpclustering/tests/main.nf.test create mode 100644 subworkflows/nf-core/snpclustering/tests/tags.yml diff --git a/subworkflows/nf-core/snpclustering/main.nf b/subworkflows/nf-core/snpclustering/main.nf new file mode 100644 index 00000000000..bf7da8c76bf --- /dev/null +++ b/subworkflows/nf-core/snpclustering/main.nf @@ -0,0 +1,51 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl = 2 + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { BCFTOOLS_FILTER } from '../../../modules/nf-core/bcftools/filter/main' +include { PLINK2_INDEP_PAIRWISE } from '../../../modules/nf-core/plink2/indeppairwise/main' +include { PLINK2_RECODE_VCF } from '../../../modules/nf-core/plink2/recodevcf/main' +include { FLASHPCA2 } from '../../../modules/nf-core/flashpca2/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow SNPCLUSTERING { + take: + meta + vcf + vcf_index + maf + missing + + main: + versions = Channel.empty() + + BCFTOOLS_FILTER ( vcf.join(vcf_index), maf, missing ) + versions = versions.mix(BCFTOOLS_FILTER.out.versions.first()) + + PLINK2_INDEP_PAIRWISE ( BCFTOOLS_FILTER.out.vcf ) + versions = versions.mix(PLINK2_INDEP_PAIRWISE.out.versions.first()) + + PLINK2_RECODE_VCF ( PLINK2_INDEP_PAIRWISE.out.pgen ) + versions = versions.mix(PLINK2_RECODE_VCF.out.versions.first()) + + FLASHPCA2 ( PLINK2_RECODE_VCF.out.vcf ) + versions = versions.mix(FLASHPCA2.out.versions.first()) + + // TODO: qui aggiungeremo KMeans/DBSCAN/plot quando creeremo i moduli local + + emit: + cluster_labels = Channel.empty() // placeholder + metrics = Channel.empty() // placeholder + plots = Channel.empty() + versions = versions +} diff --git a/subworkflows/nf-core/snpclustering/meta.yml b/subworkflows/nf-core/snpclustering/meta.yml new file mode 100644 index 00000000000..286c9e23da0 --- /dev/null +++ b/subworkflows/nf-core/snpclustering/meta.yml @@ -0,0 +1,60 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/nf-core/meta-schema.json + +name: "snpclustering" +description: "End-to-end unsupervised clustering of genomic samples starting from multi-sample VCF files. Performs variant filtering (MAF + missingness), optional LD pruning, PCA (FlashPCA2 or IncrementalPCA), KMeans/DBSCAN clustering and internal validation." +keywords: + - genomics + - clustering + - unsupervised clustering + - VCF + - nf-core +authors: + - "Donald Baku (@dbaku42)" +components: + - bcftools/filter + - plink2/indep/pairwise + - plink2/recode/vcf + - plink2/indeppairwise + - plink2/recodevcf + - flashpca2 +input: + - meta: + type: map + description: "Groovy Map containing sample metadata" + - vcf: + type: file + description: "Multi-sample VCF file (bgzipped and indexed)" + pattern: "*.{vcf,vcf.gz}" + - vcf_index: + type: file + description: "Index of the VCF file (.tbi or .csi)" + pattern: "*.{tbi,csi}" + - maf: + type: float + description: "Minimum minor allele frequency threshold" + default: 0.01 + - missing: + type: float + description: "Maximum missingness threshold" + default: 0.10 +output: + - meta: + type: map + description: "Groovy Map containing sample metadata" + - cluster_labels: + type: file + description: "CSV file with per-sample cluster assignments" + pattern: "cluster_labels.csv" + - metrics: + type: file + description: "Table with all cluster quality metrics" + pattern: "*_metrics.tsv" + - plots: + type: file + description: "Directory containing publication-ready plots" + pattern: "plots/" + - versions: + type: file + description: "File containing versions of all tools used" + pattern: "versions.yml" diff --git a/subworkflows/nf-core/snpclustering/tests/main.nf.test b/subworkflows/nf-core/snpclustering/tests/main.nf.test new file mode 100644 index 00000000000..2c5450dc434 --- /dev/null +++ b/subworkflows/nf-core/snpclustering/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_workflow { + + name "Test Workflow SNPCLUSTERING" + script "../main.nf" + workflow "SNPCLUSTERING" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/snpclustering" + tag "bcftools/filter" + tag "plink2/indeppairwise" + tag "plink2/recodevcf" + tag "flashpca2" + + test("vcf.gz input") { + + when { + workflow { + """ + input[0] = [ id:'test' ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/test.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + input[3] = 0.01 + input[4] = 0.10 + """ + } + } + + then { + assert workflow.success + } + } +} diff --git a/subworkflows/nf-core/snpclustering/tests/tags.yml b/subworkflows/nf-core/snpclustering/tests/tags.yml new file mode 100644 index 00000000000..ed287abe7e5 --- /dev/null +++ b/subworkflows/nf-core/snpclustering/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/snpclustering: + - subworkflows/nf-core/snpclustering/** From 8aac674665bf6f44eef19b1b8952adc31fb1c001 Mon Sep 17 00:00:00 2001 From: Donald Baku <141358602+dbaku42@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:46:00 +0200 Subject: [PATCH 2/2] Update subworkflows/nf-core/snpclustering/meta.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> --- subworkflows/nf-core/snpclustering/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/nf-core/snpclustering/meta.yml b/subworkflows/nf-core/snpclustering/meta.yml index 286c9e23da0..ae8599a6fcb 100644 --- a/subworkflows/nf-core/snpclustering/meta.yml +++ b/subworkflows/nf-core/snpclustering/meta.yml @@ -25,7 +25,7 @@ input: - vcf: type: file description: "Multi-sample VCF file (bgzipped and indexed)" - pattern: "*.{vcf,vcf.gz}" + pattern: "*.vcf.gz" - vcf_index: type: file description: "Index of the VCF file (.tbi or .csi)"