From 9abde152a6c70d636fd5db0c69e6775e0957cb4f Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 29 Apr 2026 15:23:30 +0200
Subject: [PATCH 01/38] New modules: cluster_metrics + cluster_viz

- cluster_metrics: computes clustering quality metrics + k-sweep
- cluster_viz: generates PCA, UMAP and t-SNE plots colored by cluster
- Both use conda environment.yml
- Full nf-test coverage
---
 .../nf-core/cluster_metrics/environment.yml   |  10 +
 modules/nf-core/cluster_metrics/main.nf       |  50 ++++
 modules/nf-core/cluster_metrics/meta.yml      |  62 ++++
 .../templates/cluster_metrics.py              | 276 ++++++++++++++++++
 .../tests/data/test_clusters.csv              |   6 +
 .../tests/data/test_features.tsv              |   6 +
 .../cluster_metrics/tests/main.nf.test        |  44 +++
 .../cluster_metrics/tests/main.nf.test.snap   | 129 ++++++++
 modules/nf-core/cluster_viz/environment.yml   |  11 +
 modules/nf-core/cluster_viz/main.nf           |  55 ++++
 modules/nf-core/cluster_viz/meta.yml          | 105 +++++++
 .../cluster_viz/templates/cluster_viz.py      | 213 ++++++++++++++
 .../cluster_viz/tests/data/test_clusters.csv  |   6 +
 .../cluster_viz/tests/data/test_features.tsv  |   6 +
 .../cluster_viz/tests/data/test_pca.eigenvec  |   6 +
 .../nf-core/cluster_viz/tests/main.nf.test    |  52 ++++
 .../cluster_viz/tests/main.nf.test.snap       | 151 ++++++++++
 17 files changed, 1188 insertions(+)
 create mode 100644 modules/nf-core/cluster_metrics/environment.yml
 create mode 100644 modules/nf-core/cluster_metrics/main.nf
 create mode 100644 modules/nf-core/cluster_metrics/meta.yml
 create mode 100644 modules/nf-core/cluster_metrics/templates/cluster_metrics.py
 create mode 100644 modules/nf-core/cluster_metrics/tests/data/test_clusters.csv
 create mode 100644 modules/nf-core/cluster_metrics/tests/data/test_features.tsv
 create mode 100644 modules/nf-core/cluster_metrics/tests/main.nf.test
 create mode 100644 modules/nf-core/cluster_metrics/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/cluster_viz/environment.yml
 create mode 100644 modules/nf-core/cluster_viz/main.nf
 create mode 100644 modules/nf-core/cluster_viz/meta.yml
 create mode 100644 modules/nf-core/cluster_viz/templates/cluster_viz.py
 create mode 100644 modules/nf-core/cluster_viz/tests/data/test_clusters.csv
 create mode 100644 modules/nf-core/cluster_viz/tests/data/test_features.tsv
 create mode 100644 modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec
 create mode 100644 modules/nf-core/cluster_viz/tests/main.nf.test
 create mode 100644 modules/nf-core/cluster_viz/tests/main.nf.test.snap

diff --git a/modules/nf-core/cluster_metrics/environment.yml b/modules/nf-core/cluster_metrics/environment.yml
new file mode 100644
index 000000000000..616821c92ff9
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - matplotlib=3.9.*
+  - pandas=2.2.*
+  - python=3.12
+  - scikit-learn=1.5.*
diff --git a/modules/nf-core/cluster_metrics/main.nf b/modules/nf-core/cluster_metrics/main.nf
new file mode 100644
index 000000000000..001f5479d85d
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/main.nf
@@ -0,0 +1,50 @@
+#!/usr/bin/env nextflow
+nextflow.enable.dsl = 2
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    PROCESS: CLUSTER_METRICS
+    Compute clustering quality metrics and k-sweep
+    Author: Donald Baku (athor)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process CLUSTER_METRICS {
+    tag "$meta.id"
+    label 'process_medium'
+    conda "${moduleDir}/environment.yml"
+
+    input:
+    tuple val(meta), path(features), path(clusters)
+    val out_prefix
+
+    output:
+    tuple val(meta), path("*_metrics.tsv")     , emit: metrics
+    tuple val(meta), path("*_k_sweep.csv")     , emit: k_sweep
+    tuple val(meta), path("*_selected.json")   , emit: selected
+    tuple val(meta), path("*.png")             , emit: plots, optional: true
+    path "versions.yml"                        , emit: versions, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
+
+    """
+    python3 ${projectDir}/modules/nf-core/cluster_metrics/templates/cluster_metrics.py \\
+        --features ${features} \\
+        --clusters ${clusters} \\
+        --out-k-sweep ${prefix}_k_sweep.csv \\
+        --out-selected ${prefix}_selected.json \\
+        --out-prefix ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python3 --version | cut -d' ' -f2)
+        pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
+        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
+        matplotlib: \$(python3 -c "import matplotlib; print(matplotlib.__version__)")
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/cluster_metrics/meta.yml b/modules/nf-core/cluster_metrics/meta.yml
new file mode 100644
index 000000000000..0c55de377200
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/meta.yml
@@ -0,0 +1,62 @@
+name: "cluster_metrics"
+description: "Computes clustering quality metrics (silhouette, Calinski-Harabasz, Davies-Bouldin) and performs k-sweep analysis"
+keywords:
+  - clustering
+  - metrics
+  - silhouette
+  - calinski-harabasz
+  - davies-bouldin
+  - evaluation
+
+tools:
+  - "scikit-learn":
+      description: "Machine learning library for clustering metrics"
+      homepage: "https://scikit-learn.org/"
+      documentation: "https://scikit-learn.org/stable/modules/clustering.html"
+      licence: ["BSD-3-Clause"]
+
+authors:
+  - "@dbaku42"
+maintainers:
+  - "@dbaku42"
+
+input:
+  - meta:
+      type: map
+      description: Groovy Map containing sample information
+  - features:
+      type: file
+      description: TSV file with sample_id and numeric features (e.g. PCA scores)
+      pattern: "*.tsv"
+  - clusters:
+      type: file
+      description: CSV/TSV file with sample_id and cluster assignment
+      pattern: "*_clusters.*"
+  - out_prefix:
+      type: string
+      description: Prefix for output files
+
+output:
+  - meta:
+      type: map
+      description: Groovy Map containing sample information
+  - metrics:
+      type: file
+      description: TSV with selected cluster quality metrics
+      pattern: "*_metrics.tsv"
+  - k_sweep:
+      type: file
+      description: CSV with metrics for different values of k
+      pattern: "*_k_sweep.csv"
+  - selected:
+      type: file
+      description: JSON with the selected/best metrics
+      pattern: "*_selected.json"
+  - plots:
+      type: file
+      description: Optional PNG plots (elbow, silhouette, etc.)
+      pattern: "*.png"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
diff --git a/modules/nf-core/cluster_metrics/templates/cluster_metrics.py b/modules/nf-core/cluster_metrics/templates/cluster_metrics.py
new file mode 100644
index 000000000000..93da1f6a03d0
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/templates/cluster_metrics.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from sklearn.cluster import KMeans
+from sklearn.metrics import (
+    silhouette_score,
+    calinski_harabasz_score,
+    davies_bouldin_score,
+)
+
+import matplotlib
+matplotlib.use("Agg")
+
+
+def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    df.columns = [str(c).lstrip("#") for c in df.columns]
+
+    cols_upper = {str(c).upper(): c for c in df.columns}
+
+    if "IID" in cols_upper:
+        iid_col = cols_upper["IID"]
+        dup_mask = df[iid_col].astype(str).str.upper().isin({"FID", "IID"})
+        if dup_mask.any():
+            df = df.loc[~dup_mask].copy().reset_index(drop=True)
+
+    cols_upper = {str(c).upper(): c for c in df.columns}
+
+    if "SAMPLE_ID" in cols_upper:
+        sample_col = cols_upper["SAMPLE_ID"]
+        if sample_col != "sample_id":
+            df = df.rename(columns={sample_col: "sample_id"})
+        return df
+
+    if "IID" in cols_upper:
+        iid_col = cols_upper["IID"]
+        iid_numeric = pd.to_numeric(df[iid_col], errors="coerce").notna().all()
+
+        if iid_numeric:
+            df = df.drop(columns=[iid_col])
+            if len(df.columns) == 0:
+                raise ValueError("Cannot infer sample_id after dropping numeric IID column")
+            df = df.rename(columns={df.columns[0]: "sample_id"})
+        else:
+            df = df.rename(columns={iid_col: "sample_id"})
+
+        fid_cols = [c for c in df.columns if str(c).upper() == "FID"]
+        if fid_cols:
+            df = df.drop(columns=fid_cols)
+
+        return df
+
+    raise ValueError(
+        f"Cannot find sample ID column (expected 'sample_id' or 'IID'). "
+        f"Found: {list(df.columns)}"
+    )
+
+
+def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
+    df = pd.read_csv(path, sep="\t", dtype=str)
+    df = _normalise_id_column(df)
+
+    if "sample_id" not in df.columns:
+        raise ValueError("features file must contain a sample_id column after normalization")
+
+    sample_ids = df["sample_id"].astype(str)
+    X = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce")
+    X = X.fillna(X.mean(numeric_only=True))
+    X = X.fillna(0.0)
+
+    return X, sample_ids
+
+
+def _looks_mostly_numeric(s: pd.Series) -> bool:
+    if len(s) == 0:
+        return False
+    parsed = pd.to_numeric(s.astype(str), errors="coerce")
+    return float(parsed.notna().mean()) >= 0.8
+
+
+def load_clusters(path: str) -> tuple[pd.DataFrame, str]:
+    df = pd.read_csv(path, sep=",", dtype=str)
+    df = df.copy()
+    df.columns = [str(c).lstrip("#") for c in df.columns]
+
+    cols_upper = {str(c).upper(): c for c in df.columns}
+
+    if "CLUSTER" not in cols_upper:
+        raise ValueError("clusters CSV must have a 'cluster' column")
+
+    cluster_col = cols_upper["CLUSTER"]
+
+    if "SAMPLE_ID" in cols_upper:
+        sample_col = cols_upper["SAMPLE_ID"]
+        out = df[[sample_col, cluster_col]].copy()
+        out.columns = ["sample_id", "cluster"]
+        out["sample_id"] = out["sample_id"].astype(str)
+        out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
+        return out, "sample_id"
+
+    try:
+        norm = _normalise_id_column(df.copy())
+        if "sample_id" in norm.columns and "cluster" in norm.columns:
+            out = norm[["sample_id", "cluster"]].copy()
+            out["sample_id"] = out["sample_id"].astype(str)
+            out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
+            return out, "sample_id"
+    except Exception:
+        pass
+
+    other_cols = [c for c in df.columns if c != cluster_col]
+
+    if len(other_cols) == 1:
+        candidate = other_cols[0]
+        candidate_vals = df[candidate].astype(str)
+
+        if not _looks_mostly_numeric(candidate_vals):
+            out = pd.DataFrame({
+                "sample_id": candidate_vals,
+                "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int),
+            })
+            return out, "sample_id"
+
+    out = pd.DataFrame({
+        "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int)
+    })
+    return out, "row_order"
+
+
+def safe_cluster_metrics(X: np.ndarray, labels: np.ndarray) -> dict:
+    uniq = np.unique(labels)
+    n_clusters = len(uniq) - (1 if -1 in uniq else 0)
+
+    if n_clusters < 2:
+        return {
+            "n_clusters": int(n_clusters),
+            "silhouette": None,
+            "calinski_harabasz": None,
+            "davies_bouldin": None,
+        }
+
+    mask = labels != -1
+    X_use, y_use = X[mask], labels[mask]
+
+    if len(X_use) < 2 or len(np.unique(y_use)) < 2:
+        return {
+            "n_clusters": int(n_clusters),
+            "silhouette": None,
+            "calinski_harabasz": None,
+            "davies_bouldin": None,
+        }
+
+    return {
+        "n_clusters": int(n_clusters),
+        "silhouette": float(silhouette_score(X_use, y_use)),
+        "calinski_harabasz": float(calinski_harabasz_score(X_use, y_use)),
+        "davies_bouldin": float(davies_bouldin_score(X_use, y_use)),
+    }
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--features", required=True)
+    ap.add_argument("--clusters", required=True)
+    ap.add_argument("--k-min", type=int, default=2)
+    ap.add_argument("--k-max", type=int, default=12)
+    ap.add_argument("--out-k-sweep", required=True)
+    ap.add_argument("--out-selected", required=True)
+    ap.add_argument("--out-prefix", required=True)
+    args = ap.parse_args()
+
+    X_df, sample_ids = load_features(args.features)
+    clusters_df, cluster_mode = load_clusters(args.clusters)
+
+    if cluster_mode == "sample_id":
+        clusters = clusters_df.set_index("sample_id")["cluster"]
+        common = sample_ids[sample_ids.isin(clusters.index)]
+
+        if len(common) > 0:
+            X = X_df.loc[common.index].values
+            labels = clusters.loc[common.values].values
+            aligned_ids = common.astype(str).tolist()
+            alignment_mode = "sample_id"
+        elif len(clusters_df) == len(sample_ids):
+            X = X_df.values
+            labels = clusters_df["cluster"].values
+            aligned_ids = sample_ids.astype(str).tolist()
+            alignment_mode = "row_order_fallback"
+        else:
+            raise ValueError(
+                f"No overlapping sample_id between features and clusters.\n"
+                f"  features IDs (first 5): {sample_ids.head().tolist()}\n"
+                f"  clusters IDs (first 5): {list(clusters.index[:5])}"
+            )
+    else:
+        if len(clusters_df) != len(sample_ids):
+            raise ValueError(
+                "clusters CSV has no usable sample_id column and row counts do not match.\n"
+                f"  n_features={len(sample_ids)}\n"
+                f"  n_clusters={len(clusters_df)}"
+            )
+        X = X_df.values
+        labels = clusters_df["cluster"].values
+        aligned_ids = sample_ids.astype(str).tolist()
+        alignment_mode = "row_order"
+
+    if len(X) < 2:
+        raise ValueError("Need at least 2 samples to compute cluster metrics")
+
+    selected = safe_cluster_metrics(X, labels)
+    selected["input_clusters"] = Path(args.clusters).name
+    selected["input_features"] = Path(args.features).name
+    selected["n_samples_used"] = int(len(aligned_ids))
+    selected["alignment_mode"] = alignment_mode
+
+    metrics_tsv = f"{args.out_prefix}_metrics.tsv"
+    pd.DataFrame([selected]).to_csv(metrics_tsv, sep="\t", index=False)
+
+    rows = []
+    max_k = min(int(args.k_max), len(X))
+    for k in range(int(args.k_min), max_k + 1):
+        model = KMeans(n_clusters=k, n_init="auto", random_state=42)
+        y = model.fit_predict(X)
+
+        sil = ch = db = None
+        if 1 < len(np.unique(y)) < len(X):
+            sil = float(silhouette_score(X, y))
+            ch = float(calinski_harabasz_score(X, y))
+            db = float(davies_bouldin_score(X, y))
+
+        rows.append({
+            "k": k,
+            "inertia": float(model.inertia_),
+            "silhouette": sil,
+            "calinski_harabasz": ch,
+            "davies_bouldin": db,
+        })
+
+    sweep_df = pd.DataFrame(rows)
+    sweep_df.to_csv(args.out_k_sweep, sep=",", index=False)
+    Path(args.out_selected).write_text(json.dumps(selected, indent=2))
+
+    pfx = args.out_prefix
+    try:
+        import matplotlib.pyplot as plt
+
+        def plot_curve(metric, title, ylabel, out_png):
+            plt.figure(figsize=(7, 4.5))
+            vals = sweep_df[metric].dropna()
+            ks = sweep_df.loc[vals.index, "k"]
+            plt.plot(ks, vals, marker="o")
+            plt.xticks(sweep_df["k"].tolist())
+            plt.title(title)
+            plt.xlabel("k")
+            plt.ylabel(ylabel)
+            plt.tight_layout()
+            plt.savefig(out_png, dpi=200)
+            plt.close()
+
+        if not sweep_df.empty:
+            plot_curve("inertia", "Elbow method (KMeans inertia)", "inertia", f"{pfx}_elbow.png")
+            plot_curve("silhouette", "Silhouette score (higher is better)", "silhouette", f"{pfx}_silhouette.png")
+            plot_curve("davies_bouldin", "Davies-Bouldin index (lower is better)", "davies_bouldin", f"{pfx}_davies_bouldin.png")
+            plot_curve("calinski_harabasz", "Calinski-Harabasz index (higher is better)", "calinski_harabasz", f"{pfx}_calinski.png")
+
+    except Exception as e:
+        Path("plot_warning.txt").write_text(f"Plotting failed: {e}\n")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv b/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv
new file mode 100644
index 000000000000..1258849b8fbe
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv
@@ -0,0 +1,6 @@
+sample_id,cluster
+sample01,0
+sample02,2
+sample03,1
+sample04,2
+sample05,1
diff --git a/modules/nf-core/cluster_metrics/tests/data/test_features.tsv b/modules/nf-core/cluster_metrics/tests/data/test_features.tsv
new file mode 100644
index 000000000000..033d23b82df8
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/tests/data/test_features.tsv
@@ -0,0 +1,6 @@
+sample_id	PC1	PC2	PC3
+sample01	0.1234	0.5678	0.9012
+sample02	-0.2345	0.6789	-0.0123
+sample03	0.3456	-0.7890	0.1234
+sample04	-0.4567	0.8901	-0.2345
+sample05	0.5678	-0.9012	0.3456
diff --git a/modules/nf-core/cluster_metrics/tests/main.nf.test b/modules/nf-core/cluster_metrics/tests/main.nf.test
new file mode 100644
index 000000000000..c66d8349ed93
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/tests/main.nf.test
@@ -0,0 +1,44 @@
+nextflow_process {
+    name "Test Process CLUSTER_METRICS"
+    script "../main.nf"
+    process "CLUSTER_METRICS"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cluster_metrics"
+
+    test("cluster_metrics - features and clusters") {
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[1] = 'test'
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(
+                process.out.metrics,
+                process.out.k_sweep,
+                process.out.selected,
+                process.out.versions
+            ).match()
+        }
+    }
+
+    test("cluster_metrics - features and clusters - stub") {
+        options "-stub"
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[1] = 'test'
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+}
diff --git a/modules/nf-core/cluster_metrics/tests/main.nf.test.snap b/modules/nf-core/cluster_metrics/tests/main.nf.test.snap
new file mode 100644
index 000000000000..30f9c8ea46a9
--- /dev/null
+++ b/modules/nf-core/cluster_metrics/tests/main.nf.test.snap
@@ -0,0 +1,129 @@
+{
+    "cluster_metrics - features and clusters - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_k_sweep.csv:md5,9709607199ba889999dadf4d251497be"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
+                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
+                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
+                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
+                        ]
+                    ]
+                ],
+                "4": [
+                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                ],
+                "k_sweep": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_k_sweep.csv:md5,9709607199ba889999dadf4d251497be"
+                    ]
+                ],
+                "metrics": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
+                    ]
+                ],
+                "plots": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        [
+                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
+                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
+                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
+                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
+                        ]
+                    ]
+                ],
+                "selected": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                ]
+            }
+        ],
+        "timestamp": "2026-04-29T14:41:23.201098606",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    },
+    "cluster_metrics - features and clusters": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_k_sweep.csv:md5,b321710d5bc65ecdd9894da7e0de7d67"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
+                ]
+            ],
+            [
+                "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+            ]
+        ],
+        "timestamp": "2026-04-29T14:41:18.57417712",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cluster_viz/environment.yml b/modules/nf-core/cluster_viz/environment.yml
new file mode 100644
index 000000000000..803fb67fb108
--- /dev/null
+++ b/modules/nf-core/cluster_viz/environment.yml
@@ -0,0 +1,11 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - matplotlib=3.9.*
+  - pandas=2.2.*
+  - python=3.12
+  - scikit-learn=1.5.*
+  - umap-learn=0.5.*
diff --git a/modules/nf-core/cluster_viz/main.nf b/modules/nf-core/cluster_viz/main.nf
new file mode 100644
index 000000000000..a68e4cc6a1ad
--- /dev/null
+++ b/modules/nf-core/cluster_viz/main.nf
@@ -0,0 +1,55 @@
+#!/usr/bin/env nextflow
+nextflow.enable.dsl = 2
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    PROCESS: CLUSTER_VIZ
+    Generates PCA, UMAP and t-SNE visualizations colored by cluster
+    Author: Donald Baku (athor)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process CLUSTER_VIZ {
+    tag "$meta.id"
+    label 'process_medium'
+    conda "${moduleDir}/environment.yml"
+
+    input:
+    tuple val(meta), path(features), path(clusters), path(pca_scores)
+    val out_prefix
+
+    output:
+    tuple val(meta), path("*_umap.tsv")   , emit: umap
+    tuple val(meta), path("*_tsne.tsv")   , emit: tsne
+    tuple val(meta), path("*_umap.png")   , emit: umap_png
+    tuple val(meta), path("*_tsne.png")   , emit: tsne_png
+    tuple val(meta), path("*_pca.png")    , emit: pca_png
+    path "versions.yml"                   , emit: versions, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
+
+    """
+    python3 ${projectDir}/modules/nf-core/cluster_viz/templates/cluster_viz.py \\
+        --features ${features} \\
+        --clusters ${clusters} \\
+        --pca-scores ${pca_scores} \\
+        --out-umap-tsv ${prefix}_umap.tsv \\
+        --out-tsne-tsv ${prefix}_tsne.tsv \\
+        --out-umap-png ${prefix}_umap.png \\
+        --out-tsne-png ${prefix}_tsne.png \\
+        --out-pca-png ${prefix}_pca.png
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python3 --version | cut -d' ' -f2)
+        pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
+        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
+        umap-learn: \$(python3 -c "import umap; print(umap.__version__)" 2>/dev/null || echo 'N/A')
+        matplotlib: \$(python3 -c "import matplotlib; print(matplotlib.__version__)")
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/cluster_viz/meta.yml b/modules/nf-core/cluster_viz/meta.yml
new file mode 100644
index 000000000000..eca231e91f85
--- /dev/null
+++ b/modules/nf-core/cluster_viz/meta.yml
@@ -0,0 +1,105 @@
+name: "cluster_viz"
+description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster assignment"
+keywords:
+  - clustering
+  - visualization
+  - umap
+  - tsne
+  - pca
+
+tools:
+  - "scikit-learn":
+      description: "Machine learning library"
+      homepage: "https://scikit-learn.org/"
+      documentation: "https://scikit-learn.org/stable/"
+      licence: ["BSD-3-Clause"]
+  - "umap-learn":
+      description: "Uniform Manifold Approximation and Projection"
+      homepage: "https://umap-learn.readthedocs.io/"
+      documentation: "https://umap-learn.readthedocs.io/"
+      licence: ["BSD-3-Clause"]
+
+authors:
+  - "@dbaku42"
+maintainers:
+  - "@dbaku42"
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test' ]
+    - features:
+        type: file
+        description: TSV with features used for clustering
+        pattern: "*.tsv"
+    - clusters:
+        type: file
+        description: Cluster assignment file
+        pattern: "*_clusters.*"
+    - pca_scores:
+        type: file
+        description: Original PCA scores file
+        pattern: "*.eigenvec"
+
+  - out_prefix:
+      type: string
+      description: Prefix for output files
+
+output:
+  umap:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+      - "_umap.tsv":
+          type: file
+          description: UMAP coordinates TSV
+          pattern: "_umap.tsv"
+
+  tsne:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+      - "_tsne.tsv":
+          type: file
+          description: t-SNE coordinates TSV
+          pattern: "_tsne.tsv"
+
+  umap_png:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+      - "_umap.png":
+          type: file
+          description: UMAP plot PNG
+          pattern: "_umap.png"
+
+  tsne_png:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+      - "_tsne.png":
+          type: file
+          description: t-SNE plot PNG
+          pattern: "_tsne.png"
+
+  pca_png:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+      - "*_pca.png":
+          type: file
+          description: PCA plot colored by cluster
+          pattern: "*_pca.png"
+
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
diff --git a/modules/nf-core/cluster_viz/templates/cluster_viz.py b/modules/nf-core/cluster_viz/templates/cluster_viz.py
new file mode 100644
index 000000000000..18ffbba4fff7
--- /dev/null
+++ b/modules/nf-core/cluster_viz/templates/cluster_viz.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+
+"""Cluster visualizations.
+
+Produces three 2D plots, all colored by cluster label:
+  - PCA (first two columns from pca_scores)
+  - UMAP (computed on the feature matrix used for clustering)
+  - t-SNE (computed on the feature matrix used for clustering)
+
+Also writes UMAP and t-SNE coordinates to TSV.
+"""
+
+import argparse
+
+import numpy as np
+import pandas as pd
+from sklearn.manifold import TSNE
+
+
+def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Handles the header formats that FlashPCA/PLINK2 produces:
+      - '#IID' (PLINK2 eigenvec: leading hash on first column)
+      - 'IID'  (FlashPCA / older PLINK)
+      - 'FID', 'IID' (two-column prefix)
+      - 'sample_id' (already normalised)
+    """
+    # Strip leading '#' (PLINK2 eigenvec writes '#IID' as the first column)
+    df = df.rename(columns=lambda c: c.lstrip("#"))
+
+    cols_upper = {c.upper(): c for c in df.columns}
+
+    # Remove duplicate header row (IID value == "FID" or "IID")
+    if "IID" in cols_upper:
+        iid_col = cols_upper["IID"]
+        dup_mask = df[iid_col].str.upper().isin({"FID", "IID"})
+        if dup_mask.any():
+            df = df[~dup_mask].copy().reset_index(drop=True)
+
+    cols_upper = {c.upper(): c for c in df.columns}
+
+    if "SAMPLE_ID" in cols_upper:
+        return df
+
+    if "IID" in cols_upper:
+        iid_col = cols_upper["IID"]
+        iid_numeric = pd.to_numeric(df[iid_col], errors="coerce").notna().all()
+        if iid_numeric:
+            df = df.drop(columns=[iid_col])
+            df = df.rename(columns={df.columns[0]: "sample_id"})
+        else:
+            df = df.rename(columns={iid_col: "sample_id"})
+
+        fid_cols = [c for c in df.columns if c.upper() == "FID"]
+        if fid_cols:
+            df = df.drop(columns=fid_cols)
+        return df
+
+    raise ValueError(
+        f"Cannot find sample ID column (expected 'sample_id' or 'IID'). "
+        f"Found: {list(df.columns)}"
+    )
+
+
+def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
+    df = pd.read_csv(path, sep=r"\s+", engine="python", dtype=str)
+    df = _normalise_id_column(df)
+    sample_ids = df["sample_id"].astype(str)
+    X = (
+        df.drop(columns=["sample_id"])
+        .apply(pd.to_numeric, errors="coerce")
+        .fillna(0.0)
+    )
+    return X, sample_ids
+
+
+def load_clusters(path: str) -> pd.Series:
+    df = pd.read_csv(path, sep=",", dtype=str)
+    df = _normalise_id_column(df)
+    if "cluster" not in df.columns:
+        raise ValueError("clusters CSV must have a 'cluster' column")
+    return df.set_index("sample_id")["cluster"].astype(int)
+
+
+def safe_perplexity(n_samples: int, requested: float) -> float:
+    if n_samples <= 3:
+        return 1.0
+    upper = (n_samples - 1) / 3.0
+    return float(max(2.0, min(requested, upper)))
+
+
+def compute_umap(X: np.ndarray, n_neighbors: int, min_dist: float) -> np.ndarray:
+    try:
+        import umap
+        return umap.UMAP(
+            n_components=2,
+            n_neighbors=n_neighbors,
+            min_dist=min_dist,
+            random_state=42,
+        ).fit_transform(X)
+    except Exception as e:
+        print(f"[WARN] UMAP failed, fallback to first 2 feature columns: {e}")
+        if X.shape[1] >= 2:
+            return X[:, :2]
+        elif X.shape[1] == 1:
+            return np.column_stack([X[:, 0], np.zeros(X.shape[0])])
+        else:
+            return np.zeros((X.shape[0], 2))
+
+
+def compute_tsne(X: np.ndarray, perplexity: float, max_iter: int) -> np.ndarray:
+    return TSNE(
+        n_components=2,
+        perplexity=perplexity,
+        init="pca",
+        random_state=42,
+        max_iter=max_iter,
+        learning_rate="auto",
+    ).fit_transform(X)
+
+
+def plot_scatter(df, x, y, out_png, title, xlabel=None, ylabel=None):
+    import matplotlib.pyplot as plt
+    from matplotlib.lines import Line2D
+
+    plt.figure(figsize=(7, 5))
+    labels = df["cluster"].astype(int).values
+    uniq = np.unique(labels)
+    sc = plt.scatter(df[x], df[y], c=labels, cmap="Paired", s=24,linewidths=0.4, alpha=0.85)
+    plt.title(title)
+    plt.xlabel(xlabel or x)
+    plt.ylabel(ylabel or y)
+    plt.grid(True, alpha=0.5)
+    handles = [
+        Line2D([0], [0], marker="o", linestyle="", markersize=7,
+               markerfacecolor=sc.cmap(sc.norm(k)), markeredgecolor="none",
+               label=f"Cluster {k}")
+        for k in uniq
+    ]
+    plt.legend(handles=handles, title="Clusters", loc="center left",
+               bbox_to_anchor=(1.02, 0.5), borderaxespad=0.0, frameon=True)
+    plt.tight_layout()
+    plt.savefig(out_png, dpi=200, bbox_inches="tight")
+    plt.close()
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description="PCA + UMAP + t-SNE plots colored by cluster")
+    ap.add_argument("--features", required=True)
+    ap.add_argument("--clusters", required=True)
+    ap.add_argument("--pca-scores", required=True)
+    ap.add_argument("--tsne-perplexity", type=float, default=30.0)
+    ap.add_argument("--tsne-iter", type=int, default=1000)
+    ap.add_argument("--umap-neighbors", type=int, default=15)
+    ap.add_argument("--umap-min-dist", type=float, default=0.1)
+    ap.add_argument("--out-umap-tsv", required=True)
+    ap.add_argument("--out-tsne-tsv", required=True)
+    ap.add_argument("--out-umap-png", required=True)
+    ap.add_argument("--out-tsne-png", required=True)
+    ap.add_argument("--out-pca-png", required=True)
+    args = ap.parse_args()
+
+    X_df, sample_ids = load_features(args.features)
+    clusters = load_clusters(args.clusters)
+
+    common = sample_ids[sample_ids.isin(clusters.index)]
+    if len(common) == 0:
+        raise ValueError(
+            f"No overlapping sample_id between features and clusters.\n"
+            f"  features IDs (first 5): {sample_ids.head().tolist()}\n"
+            f"  clusters IDs (first 5): {list(clusters.index[:5])}"
+        )
+
+    X = X_df.loc[common.index].values
+    y = clusters.loc[common.values].values
+
+    umap_coords = compute_umap(X, args.umap_neighbors, args.umap_min_dist)
+    umap_df = pd.DataFrame({
+        "sample_id": common.values,
+        "x": umap_coords[:, 0],
+        "y": umap_coords[:, 1],
+        "cluster": y,
+    })
+    umap_df.to_csv(args.out_umap_tsv, sep="\t", index=False)
+    plot_scatter(umap_df, "x", "y", args.out_umap_png, "UMAP embedding")
+
+    perp = safe_perplexity(len(common), args.tsne_perplexity)
+    tsne_coords = compute_tsne(X, perp, args.tsne_iter)
+    tsne_df = pd.DataFrame({
+        "sample_id": common.values,
+        "x": tsne_coords[:, 0],
+        "y": tsne_coords[:, 1],
+        "cluster": y,
+    })
+    tsne_df.to_csv(args.out_tsne_tsv, sep="\t", index=False)
+    plot_scatter(tsne_df, "x", "y", args.out_tsne_png,
+                 f"t-SNE (perplexity={perp:.1f})")
+
+    pca_df = pd.read_csv(args.pca_scores, sep=r"\s+", engine="python", dtype=str)
+    pca_df = _normalise_id_column(pca_df)
+    comp_cols = [c for c in pca_df.columns if c != "sample_id"]
+    if len(comp_cols) < 2:
+        raise ValueError("pca_scores must have at least 2 PC columns")
+    c1, c2 = comp_cols[0], comp_cols[1]
+    for col in [c1, c2]:
+        pca_df[col] = pd.to_numeric(pca_df[col], errors="coerce")
+    merged = pca_df.merge(umap_df[["sample_id", "cluster"]], on="sample_id", how="inner")
+    plot_scatter(merged, c1, c2, args.out_pca_png,
+                 "PCA", c1, c2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/cluster_viz/tests/data/test_clusters.csv b/modules/nf-core/cluster_viz/tests/data/test_clusters.csv
new file mode 100644
index 000000000000..1258849b8fbe
--- /dev/null
+++ b/modules/nf-core/cluster_viz/tests/data/test_clusters.csv
@@ -0,0 +1,6 @@
+sample_id,cluster
+sample01,0
+sample02,2
+sample03,1
+sample04,2
+sample05,1
diff --git a/modules/nf-core/cluster_viz/tests/data/test_features.tsv b/modules/nf-core/cluster_viz/tests/data/test_features.tsv
new file mode 100644
index 000000000000..033d23b82df8
--- /dev/null
+++ b/modules/nf-core/cluster_viz/tests/data/test_features.tsv
@@ -0,0 +1,6 @@
+sample_id	PC1	PC2	PC3
+sample01	0.1234	0.5678	0.9012
+sample02	-0.2345	0.6789	-0.0123
+sample03	0.3456	-0.7890	0.1234
+sample04	-0.4567	0.8901	-0.2345
+sample05	0.5678	-0.9012	0.3456
diff --git a/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec b/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec
new file mode 100644
index 000000000000..61aae5d8b413
--- /dev/null
+++ b/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec
@@ -0,0 +1,6 @@
+#FID IID PC1 PC2 PC3
+0 sample01 0.1234 0.5678 0.9012
+0 sample02 -0.2345 0.6789 -0.0123
+0 sample03 0.3456 -0.7890 0.1234
+0 sample04 -0.4567 0.8901 -0.2345
+0 sample05 0.5678 -0.9012 0.3456
diff --git a/modules/nf-core/cluster_viz/tests/main.nf.test b/modules/nf-core/cluster_viz/tests/main.nf.test
new file mode 100644
index 000000000000..341f15cf062b
--- /dev/null
+++ b/modules/nf-core/cluster_viz/tests/main.nf.test
@@ -0,0 +1,52 @@
+nextflow_process {
+    name "Test Process CLUSTER_VIZ"
+    script "../main.nf"
+    process "CLUSTER_VIZ"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cluster_viz"
+
+    test("cluster_viz - features clusters pca") {
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], 
+                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
+                input[1] = 'test'
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(
+                process.out.umap,
+                process.out.tsne,
+                process.out.umap_png,
+                process.out.tsne_png,
+                process.out.pca_png,
+                process.out.versions
+            ).match()
+        }
+    }
+
+    test("cluster_viz - features clusters pca - stub") {
+        options "-stub"
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], 
+                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
+                input[1] = 'test'
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+}
diff --git a/modules/nf-core/cluster_viz/tests/main.nf.test.snap b/modules/nf-core/cluster_viz/tests/main.nf.test.snap
new file mode 100644
index 000000000000..4fe180aa20aa
--- /dev/null
+++ b/modules/nf-core/cluster_viz/tests/main.nf.test.snap
@@ -0,0 +1,151 @@
+{
+    "cluster_viz - features clusters pca": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_tsne.tsv:md5,fc79b712722096f127cdb80c269380fb"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
+                ]
+            ],
+            [
+                "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
+            ]
+        ],
+        "timestamp": "2026-04-29T14:56:42.32373645",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    },
+    "cluster_viz - features clusters pca - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_tsne.tsv:md5,fc79b712722096f127cdb80c269380fb"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
+                    ]
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
+                    ]
+                ],
+                "5": [
+                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
+                ],
+                "pca_png": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
+                    ]
+                ],
+                "tsne": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_tsne.tsv:md5,fc79b712722096f127cdb80c269380fb"
+                    ]
+                ],
+                "tsne_png": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
+                    ]
+                ],
+                "umap": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
+                    ]
+                ],
+                "umap_png": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
+                ]
+            }
+        ],
+        "timestamp": "2026-04-29T14:56:54.120297782",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    }
+}
\ No newline at end of file

From 0320a4163df54ed91e3ecc9e9a5846d89437b428 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 4 May 2026 14:44:50 +0200
Subject: [PATCH 02/38] Move custom clustering modules under custom

---
 modules/nf-core/cluster_metrics/meta.yml      |  62 ----------
 modules/nf-core/cluster_viz/meta.yml          | 105 ----------------
 .../cluster_metrics/environment.yml           |   0
 .../{ => custom}/cluster_metrics/main.nf      |   4 +-
 .../nf-core/custom/cluster_metrics/meta.yml   |  93 +++++++++++++++
 .../templates/cluster_metrics.py              |   0
 .../tests/data/test_clusters.csv              |   0
 .../tests/data/test_features.tsv              |   0
 .../cluster_metrics/tests/main.nf.test        |   4 +-
 .../cluster_metrics/tests/main.nf.test.snap   |  10 +-
 .../{ => custom}/cluster_viz/environment.yml  |   0
 .../nf-core/{ => custom}/cluster_viz/main.nf  |   4 +-
 modules/nf-core/custom/cluster_viz/meta.yml   | 112 ++++++++++++++++++
 .../cluster_viz/templates/cluster_viz.py      |   0
 .../cluster_viz/tests/data/test_clusters.csv  |   0
 .../cluster_viz/tests/data/test_features.tsv  |   0
 .../cluster_viz/tests/data/test_pca.eigenvec  |   0
 .../cluster_viz/tests/main.nf.test            |  12 +-
 .../cluster_viz/tests/main.nf.test.snap       |   0
 19 files changed, 222 insertions(+), 184 deletions(-)
 delete mode 100644 modules/nf-core/cluster_metrics/meta.yml
 delete mode 100644 modules/nf-core/cluster_viz/meta.yml
 rename modules/nf-core/{ => custom}/cluster_metrics/environment.yml (100%)
 rename modules/nf-core/{ => custom}/cluster_metrics/main.nf (92%)
 create mode 100644 modules/nf-core/custom/cluster_metrics/meta.yml
 rename modules/nf-core/{ => custom}/cluster_metrics/templates/cluster_metrics.py (100%)
 rename modules/nf-core/{ => custom}/cluster_metrics/tests/data/test_clusters.csv (100%)
 rename modules/nf-core/{ => custom}/cluster_metrics/tests/data/test_features.tsv (100%)
 rename modules/nf-core/{ => custom}/cluster_metrics/tests/main.nf.test (73%)
 rename modules/nf-core/{ => custom}/cluster_metrics/tests/main.nf.test.snap (91%)
 rename modules/nf-core/{ => custom}/cluster_viz/environment.yml (100%)
 rename modules/nf-core/{ => custom}/cluster_viz/main.nf (93%)
 create mode 100644 modules/nf-core/custom/cluster_viz/meta.yml
 rename modules/nf-core/{ => custom}/cluster_viz/templates/cluster_viz.py (100%)
 rename modules/nf-core/{ => custom}/cluster_viz/tests/data/test_clusters.csv (100%)
 rename modules/nf-core/{ => custom}/cluster_viz/tests/data/test_features.tsv (100%)
 rename modules/nf-core/{ => custom}/cluster_viz/tests/data/test_pca.eigenvec (100%)
 rename modules/nf-core/{ => custom}/cluster_viz/tests/main.nf.test (59%)
 rename modules/nf-core/{ => custom}/cluster_viz/tests/main.nf.test.snap (100%)

diff --git a/modules/nf-core/cluster_metrics/meta.yml b/modules/nf-core/cluster_metrics/meta.yml
deleted file mode 100644
index 0c55de377200..000000000000
--- a/modules/nf-core/cluster_metrics/meta.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: "cluster_metrics"
-description: "Computes clustering quality metrics (silhouette, Calinski-Harabasz, Davies-Bouldin) and performs k-sweep analysis"
-keywords:
-  - clustering
-  - metrics
-  - silhouette
-  - calinski-harabasz
-  - davies-bouldin
-  - evaluation
-
-tools:
-  - "scikit-learn":
-      description: "Machine learning library for clustering metrics"
-      homepage: "https://scikit-learn.org/"
-      documentation: "https://scikit-learn.org/stable/modules/clustering.html"
-      licence: ["BSD-3-Clause"]
-
-authors:
-  - "@dbaku42"
-maintainers:
-  - "@dbaku42"
-
-input:
-  - meta:
-      type: map
-      description: Groovy Map containing sample information
-  - features:
-      type: file
-      description: TSV file with sample_id and numeric features (e.g. PCA scores)
-      pattern: "*.tsv"
-  - clusters:
-      type: file
-      description: CSV/TSV file with sample_id and cluster assignment
-      pattern: "*_clusters.*"
-  - out_prefix:
-      type: string
-      description: Prefix for output files
-
-output:
-  - meta:
-      type: map
-      description: Groovy Map containing sample information
-  - metrics:
-      type: file
-      description: TSV with selected cluster quality metrics
-      pattern: "*_metrics.tsv"
-  - k_sweep:
-      type: file
-      description: CSV with metrics for different values of k
-      pattern: "*_k_sweep.csv"
-  - selected:
-      type: file
-      description: JSON with the selected/best metrics
-      pattern: "*_selected.json"
-  - plots:
-      type: file
-      description: Optional PNG plots (elbow, silhouette, etc.)
-      pattern: "*.png"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
diff --git a/modules/nf-core/cluster_viz/meta.yml b/modules/nf-core/cluster_viz/meta.yml
deleted file mode 100644
index eca231e91f85..000000000000
--- a/modules/nf-core/cluster_viz/meta.yml
+++ /dev/null
@@ -1,105 +0,0 @@
-name: "cluster_viz"
-description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster assignment"
-keywords:
-  - clustering
-  - visualization
-  - umap
-  - tsne
-  - pca
-
-tools:
-  - "scikit-learn":
-      description: "Machine learning library"
-      homepage: "https://scikit-learn.org/"
-      documentation: "https://scikit-learn.org/stable/"
-      licence: ["BSD-3-Clause"]
-  - "umap-learn":
-      description: "Uniform Manifold Approximation and Projection"
-      homepage: "https://umap-learn.readthedocs.io/"
-      documentation: "https://umap-learn.readthedocs.io/"
-      licence: ["BSD-3-Clause"]
-
-authors:
-  - "@dbaku42"
-maintainers:
-  - "@dbaku42"
-
-input:
-  - - meta:
-        type: map
-        description: |
-          Groovy Map containing sample information
-          e.g. [ id:'test' ]
-    - features:
-        type: file
-        description: TSV with features used for clustering
-        pattern: "*.tsv"
-    - clusters:
-        type: file
-        description: Cluster assignment file
-        pattern: "*_clusters.*"
-    - pca_scores:
-        type: file
-        description: Original PCA scores file
-        pattern: "*.eigenvec"
-
-  - out_prefix:
-      type: string
-      description: Prefix for output files
-
-output:
-  umap:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-      - "_umap.tsv":
-          type: file
-          description: UMAP coordinates TSV
-          pattern: "_umap.tsv"
-
-  tsne:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-      - "_tsne.tsv":
-          type: file
-          description: t-SNE coordinates TSV
-          pattern: "_tsne.tsv"
-
-  umap_png:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-      - "_umap.png":
-          type: file
-          description: UMAP plot PNG
-          pattern: "_umap.png"
-
-  tsne_png:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-      - "_tsne.png":
-          type: file
-          description: t-SNE plot PNG
-          pattern: "_tsne.png"
-
-  pca_png:
-    - - meta:
-          type: map
-          description: |
-            Groovy Map containing sample information
-      - "*_pca.png":
-          type: file
-          description: PCA plot colored by cluster
-          pattern: "*_pca.png"
-
-  versions:
-    - versions.yml:
-        type: file
-        description: File containing software versions
-        pattern: "versions.yml"
diff --git a/modules/nf-core/cluster_metrics/environment.yml b/modules/nf-core/custom/cluster_metrics/environment.yml
similarity index 100%
rename from modules/nf-core/cluster_metrics/environment.yml
rename to modules/nf-core/custom/cluster_metrics/environment.yml
diff --git a/modules/nf-core/cluster_metrics/main.nf b/modules/nf-core/custom/cluster_metrics/main.nf
similarity index 92%
rename from modules/nf-core/cluster_metrics/main.nf
rename to modules/nf-core/custom/cluster_metrics/main.nf
index 001f5479d85d..71b91e9c3c18 100644
--- a/modules/nf-core/cluster_metrics/main.nf
+++ b/modules/nf-core/custom/cluster_metrics/main.nf
@@ -5,7 +5,7 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     PROCESS: CLUSTER_METRICS
     Compute clustering quality metrics and k-sweep
-    Author: Donald Baku (athor)
+    Author: Donald Baku (author)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
@@ -32,7 +32,7 @@ process CLUSTER_METRICS {
     def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
 
     """
-    python3 ${projectDir}/modules/nf-core/cluster_metrics/templates/cluster_metrics.py \\
+    python3 ${projectDir}/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py \\
         --features ${features} \\
         --clusters ${clusters} \\
         --out-k-sweep ${prefix}_k_sweep.csv \\
diff --git a/modules/nf-core/custom/cluster_metrics/meta.yml b/modules/nf-core/custom/cluster_metrics/meta.yml
new file mode 100644
index 000000000000..0716b49dfb6a
--- /dev/null
+++ b/modules/nf-core/custom/cluster_metrics/meta.yml
@@ -0,0 +1,93 @@
+name: "cluster_metrics"
+description: "Computes clustering quality metrics (silhouette, Calinski-Harabasz,
+  Davies-Bouldin) and performs k-sweep analysis"
+keywords:
+  - clustering
+  - metrics
+  - silhouette
+  - calinski-harabasz
+  - davies-bouldin
+  - evaluation
+tools:
+  - "scikit-learn":
+      description: "Machine learning library for clustering metrics"
+      homepage: "https://scikit-learn.org/"
+      documentation: "https://scikit-learn.org/stable/modules/clustering.html"
+      licence:
+        - "BSD-3-Clause"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: Groovy Map containing sample information
+    - features:
+        type: file
+        description: TSV file with sample_id and numeric features (e.g. PCA
+          scores)
+        pattern: "*.tsv"
+        ontologies:
+          - edam: http://edamontology.org/format_3475
+    - clusters:
+        type: file
+        description: CSV/TSV file with sample_id and cluster assignment
+        pattern: "*_clusters.*"
+        ontologies: []
+  - out_prefix:
+      type: string
+      description: Prefix for output files
+output:
+  metrics:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_metrics.tsv":
+          type: file
+          description: TSV with selected cluster quality metrics
+          pattern: "*_metrics.tsv"
+          ontologies:
+            - edam: http://edamontology.org/format_3475
+  k_sweep:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_k_sweep.csv":
+          type: file
+          description: CSV with metrics for different values of k
+          pattern: "*_k_sweep.csv"
+          ontologies:
+            - edam: http://edamontology.org/format_3752
+  selected:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_selected.json":
+          type: file
+          description: JSON with the selected/best metrics
+          pattern: "*_selected.json"
+          ontologies:
+            - edam: http://edamontology.org/format_3464
+  plots:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*.png":
+          type: file
+          description: Optional PNG plots (elbow, silhouette, etc.)
+          pattern: "*.png"
+          ontologies: []
+  versions:
+    - "versions.yml":
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750
+topics:
+  versions:
+    - versions.yml:
+        type: string
+        description: The name of the process
+authors:
+  - "@dbaku42"
+maintainers:
+  - "@dbaku42"
diff --git a/modules/nf-core/cluster_metrics/templates/cluster_metrics.py b/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
similarity index 100%
rename from modules/nf-core/cluster_metrics/templates/cluster_metrics.py
rename to modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
diff --git a/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv b/modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv
similarity index 100%
rename from modules/nf-core/cluster_metrics/tests/data/test_clusters.csv
rename to modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv
diff --git a/modules/nf-core/cluster_metrics/tests/data/test_features.tsv b/modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv
similarity index 100%
rename from modules/nf-core/cluster_metrics/tests/data/test_features.tsv
rename to modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv
diff --git a/modules/nf-core/cluster_metrics/tests/main.nf.test b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test
similarity index 73%
rename from modules/nf-core/cluster_metrics/tests/main.nf.test
rename to modules/nf-core/custom/cluster_metrics/tests/main.nf.test
index c66d8349ed93..e335db60dd74 100644
--- a/modules/nf-core/cluster_metrics/tests/main.nf.test
+++ b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test
@@ -10,7 +10,7 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
@@ -31,7 +31,7 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
diff --git a/modules/nf-core/cluster_metrics/tests/main.nf.test.snap b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
similarity index 91%
rename from modules/nf-core/cluster_metrics/tests/main.nf.test.snap
rename to modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
index 30f9c8ea46a9..62189103b866 100644
--- a/modules/nf-core/cluster_metrics/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
@@ -15,7 +15,7 @@
                         {
                             "id": "test"
                         },
-                        "test_k_sweep.csv:md5,9709607199ba889999dadf4d251497be"
+                        "test_k_sweep.csv:md5,6be53a6eb8379a9ac718692a86702e3b"
                     ]
                 ],
                 "2": [
@@ -47,7 +47,7 @@
                         {
                             "id": "test"
                         },
-                        "test_k_sweep.csv:md5,9709607199ba889999dadf4d251497be"
+                        "test_k_sweep.csv:md5,6be53a6eb8379a9ac718692a86702e3b"
                     ]
                 ],
                 "metrics": [
@@ -84,7 +84,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-04-29T14:41:23.201098606",
+        "timestamp": "2026-05-04T14:41:43.518699109",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -105,7 +105,7 @@
                     {
                         "id": "test"
                     },
-                    "test_k_sweep.csv:md5,b321710d5bc65ecdd9894da7e0de7d67"
+                    "test_k_sweep.csv:md5,9709607199ba889999dadf4d251497be"
                 ]
             ],
             [
@@ -120,7 +120,7 @@
                 "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
             ]
         ],
-        "timestamp": "2026-04-29T14:41:18.57417712",
+        "timestamp": "2026-05-04T14:41:38.770667954",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
diff --git a/modules/nf-core/cluster_viz/environment.yml b/modules/nf-core/custom/cluster_viz/environment.yml
similarity index 100%
rename from modules/nf-core/cluster_viz/environment.yml
rename to modules/nf-core/custom/cluster_viz/environment.yml
diff --git a/modules/nf-core/cluster_viz/main.nf b/modules/nf-core/custom/cluster_viz/main.nf
similarity index 93%
rename from modules/nf-core/cluster_viz/main.nf
rename to modules/nf-core/custom/cluster_viz/main.nf
index a68e4cc6a1ad..a5d6d9eb8e50 100644
--- a/modules/nf-core/cluster_viz/main.nf
+++ b/modules/nf-core/custom/cluster_viz/main.nf
@@ -5,7 +5,7 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     PROCESS: CLUSTER_VIZ
     Generates PCA, UMAP and t-SNE visualizations colored by cluster
-    Author: Donald Baku (athor)
+    Author: Donald Baku (author)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
@@ -33,7 +33,7 @@ process CLUSTER_VIZ {
     def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
 
     """
-    python3 ${projectDir}/modules/nf-core/cluster_viz/templates/cluster_viz.py \\
+    python3 ${projectDir}/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py \\
         --features ${features} \\
         --clusters ${clusters} \\
         --pca-scores ${pca_scores} \\
diff --git a/modules/nf-core/custom/cluster_viz/meta.yml b/modules/nf-core/custom/cluster_viz/meta.yml
new file mode 100644
index 000000000000..cdad47cfcb26
--- /dev/null
+++ b/modules/nf-core/custom/cluster_viz/meta.yml
@@ -0,0 +1,112 @@
+name: "cluster_viz"
+description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster"
+keywords:
+  - clustering
+  - visualization
+  - pca
+  - umap
+  - tsne
+  - dimension-reduction
+tools:
+  - "scikit-learn":
+      description: "Machine learning library for dimension reduction (PCA, t-SNE)"
+      homepage: "https://scikit-learn.org/"
+      documentation: "https://scikit-learn.org/stable/modules/clustering.html"
+      licence:
+        - "BSD-3-Clause"
+      identifier: ""
+  - "umap-learn":
+      description: "Uniform Manifold Approximation and Projection for dimension reduction"
+      homepage: "https://umap-learn.readthedocs.io/"
+      documentation: "https://umap-learn.readthedocs.io/en/latest/"
+      licence:
+        - "BSD-3-Clause"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: Groovy Map containing sample information
+    - features:
+        type: file
+        description: TSV file with sample_id and numeric features
+        pattern: "*.tsv"
+        ontologies:
+          - edam: http://edamontology.org/format_3475
+    - clusters:
+        type: file
+        description: CSV/TSV file with sample_id and cluster assignment
+        pattern: "*_clusters.*"
+        ontologies: []
+    - pca_scores:
+        type: file
+        description: TSV file with PCA scores from previous step
+        pattern: "*_pca_scores.tsv"
+        ontologies:
+          - edam: http://edamontology.org/format_3475
+  - out_prefix:
+      type: string
+      description: Prefix for output files
+output:
+  umap:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_umap.tsv":
+          type: file
+          description: UMAP coordinates TSV file
+          pattern: "*_umap.tsv"
+          ontologies:
+            - edam: http://edamontology.org/format_3475
+  tsne:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_tsne.tsv":
+          type: file
+          description: t-SNE coordinates TSV file
+          pattern: "*_tsne.tsv"
+          ontologies:
+            - edam: http://edamontology.org/format_3475
+  umap_png:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_umap.png":
+          type: file
+          description: UMAP visualization plot
+          pattern: "*_umap.png"
+          ontologies: []
+  tsne_png:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_tsne.png":
+          type: file
+          description: t-SNE visualization plot
+          pattern: "*_tsne.png"
+          ontologies: []
+  pca_png:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_pca.png":
+          type: file
+          description: PCA visualization plot
+          pattern: "*_pca.png"
+          ontologies: []
+  versions:
+    - "versions.yml":
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750
+topics:
+  versions:
+    - versions.yml:
+        type: string
+        description: The name of the process
+authors:
+  - "@dbaku42"
+maintainers:
+  - "@dbaku42"
diff --git a/modules/nf-core/cluster_viz/templates/cluster_viz.py b/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py
similarity index 100%
rename from modules/nf-core/cluster_viz/templates/cluster_viz.py
rename to modules/nf-core/custom/cluster_viz/templates/cluster_viz.py
diff --git a/modules/nf-core/cluster_viz/tests/data/test_clusters.csv b/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv
similarity index 100%
rename from modules/nf-core/cluster_viz/tests/data/test_clusters.csv
rename to modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv
diff --git a/modules/nf-core/cluster_viz/tests/data/test_features.tsv b/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv
similarity index 100%
rename from modules/nf-core/cluster_viz/tests/data/test_features.tsv
rename to modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv
diff --git a/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec b/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec
similarity index 100%
rename from modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec
rename to modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec
diff --git a/modules/nf-core/cluster_viz/tests/main.nf.test b/modules/nf-core/custom/cluster_viz/tests/main.nf.test
similarity index 59%
rename from modules/nf-core/cluster_viz/tests/main.nf.test
rename to modules/nf-core/custom/cluster_viz/tests/main.nf.test
index 341f15cf062b..3c873f6c1420 100644
--- a/modules/nf-core/cluster_viz/tests/main.nf.test
+++ b/modules/nf-core/custom/cluster_viz/tests/main.nf.test
@@ -11,9 +11,9 @@ nextflow_process {
             process {
                 """
                 input[0] = [ [id:'test'], 
-                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
+                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
@@ -37,9 +37,9 @@ nextflow_process {
             process {
                 """
                 input[0] = [ [id:'test'], 
-                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
+                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
diff --git a/modules/nf-core/cluster_viz/tests/main.nf.test.snap b/modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap
similarity index 100%
rename from modules/nf-core/cluster_viz/tests/main.nf.test.snap
rename to modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap

From e42433815c9bc1590454cd80d5e2335d9a9aa77e Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 4 May 2026 15:35:55 +0200
Subject: [PATCH 03/38] Move custom clustering modules under custom

---
 .../cluster_metrics/templates/cluster_metrics.py       |  4 ++--
 .../nf-core/custom/cluster_metrics/tests/main.nf.test  |  2 ++
 .../custom/cluster_metrics/tests/main.nf.test.snap     | 10 +++++-----
 modules/nf-core/custom/cluster_viz/tests/main.nf.test  |  2 ++
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py b/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
index 93da1f6a03d0..401172dca7d3 100644
--- a/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
@@ -242,7 +242,7 @@ def main() -> None:
         })
 
     sweep_df = pd.DataFrame(rows)
-    sweep_df.to_csv(args.out_k_sweep, sep=",", index=False)
+    sweep_df.to_csv(args.out_k_sweep, sep=",", index=False, float_format="%.10g")
     Path(args.out_selected).write_text(json.dumps(selected, indent=2))
 
     pfx = args.out_prefix
@@ -273,4 +273,4 @@ def plot_curve(metric, title, ylabel, out_png):
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test
index e335db60dd74..d541633b82d1 100644
--- a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test
+++ b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test
@@ -4,6 +4,8 @@ nextflow_process {
     process "CLUSTER_METRICS"
     tag "modules"
     tag "modules_nfcore"
+    tag "custom"
+    tag "custom/cluster_metrics"
     tag "cluster_metrics"
 
     test("cluster_metrics - features and clusters") {
diff --git a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
index 62189103b866..d6dcb96d8697 100644
--- a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
@@ -15,7 +15,7 @@
                         {
                             "id": "test"
                         },
-                        "test_k_sweep.csv:md5,6be53a6eb8379a9ac718692a86702e3b"
+                        "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
                     ]
                 ],
                 "2": [
@@ -47,7 +47,7 @@
                         {
                             "id": "test"
                         },
-                        "test_k_sweep.csv:md5,6be53a6eb8379a9ac718692a86702e3b"
+                        "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
                     ]
                 ],
                 "metrics": [
@@ -84,7 +84,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-04T14:41:43.518699109",
+        "timestamp": "2026-05-04T15:30:54.137415161",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -105,7 +105,7 @@
                     {
                         "id": "test"
                     },
-                    "test_k_sweep.csv:md5,9709607199ba889999dadf4d251497be"
+                    "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
                 ]
             ],
             [
@@ -120,7 +120,7 @@
                 "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
             ]
         ],
-        "timestamp": "2026-05-04T14:41:38.770667954",
+        "timestamp": "2026-05-04T15:30:49.380085424",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
diff --git a/modules/nf-core/custom/cluster_viz/tests/main.nf.test b/modules/nf-core/custom/cluster_viz/tests/main.nf.test
index 3c873f6c1420..a65f09b9d76f 100644
--- a/modules/nf-core/custom/cluster_viz/tests/main.nf.test
+++ b/modules/nf-core/custom/cluster_viz/tests/main.nf.test
@@ -4,6 +4,8 @@ nextflow_process {
     process "CLUSTER_VIZ"
     tag "modules"
     tag "modules_nfcore"
+    tag "custom"
+    tag "custom/cluster_viz"
     tag "cluster_viz"
 
     test("cluster_viz - features clusters pca") {

From 670bc334706cd23b6bcd38f060f2317a6d7b4a7c Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 4 May 2026 15:47:14 +0200
Subject: [PATCH 04/38] Fix custom clustering module lint

---
 .../templates/cluster_metrics.py              | 99 ++++++++++---------
 .../cluster_viz/templates/cluster_viz.py      | 99 ++++++++++---------
 .../custom/cluster_viz/tests/main.nf.test     |  4 +-
 3 files changed, 110 insertions(+), 92 deletions(-)

diff --git a/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py b/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
index 401172dca7d3..13a417ac37fe 100644
--- a/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
@@ -4,16 +4,16 @@
 import json
 from pathlib import Path
 
+import matplotlib
 import numpy as np
 import pandas as pd
 from sklearn.cluster import KMeans
 from sklearn.metrics import (
-    silhouette_score,
     calinski_harabasz_score,
     davies_bouldin_score,
+    silhouette_score,
 )
 
-import matplotlib
 matplotlib.use("Agg")
 
 
@@ -55,10 +55,7 @@ def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
 
         return df
 
-    raise ValueError(
-        f"Cannot find sample ID column (expected 'sample_id' or 'IID'). "
-        f"Found: {list(df.columns)}"
-    )
+    raise ValueError(f"Cannot find sample ID column (expected 'sample_id' or 'IID'). Found: {list(df.columns)}")
 
 
 def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
@@ -69,11 +66,11 @@ def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
         raise ValueError("features file must contain a sample_id column after normalization")
 
     sample_ids = df["sample_id"].astype(str)
-    X = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce")
-    X = X.fillna(X.mean(numeric_only=True))
-    X = X.fillna(0.0)
+    x = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce")
+    x = x.fillna(x.mean(numeric_only=True))
+    x = x.fillna(0.0)
 
-    return X, sample_ids
+    return x, sample_ids
 
 
 def _looks_mostly_numeric(s: pd.Series) -> bool:
@@ -120,19 +117,19 @@ def load_clusters(path: str) -> tuple[pd.DataFrame, str]:
         candidate_vals = df[candidate].astype(str)
 
         if not _looks_mostly_numeric(candidate_vals):
-            out = pd.DataFrame({
-                "sample_id": candidate_vals,
-                "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int),
-            })
+            out = pd.DataFrame(
+                {
+                    "sample_id": candidate_vals,
+                    "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int),
+                }
+            )
             return out, "sample_id"
 
-    out = pd.DataFrame({
-        "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int)
-    })
+    out = pd.DataFrame({"cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int)})
     return out, "row_order"
 
 
-def safe_cluster_metrics(X: np.ndarray, labels: np.ndarray) -> dict:
+def safe_cluster_metrics(x: np.ndarray, labels: np.ndarray) -> dict:
     uniq = np.unique(labels)
     n_clusters = len(uniq) - (1 if -1 in uniq else 0)
 
@@ -145,9 +142,9 @@ def safe_cluster_metrics(X: np.ndarray, labels: np.ndarray) -> dict:
         }
 
     mask = labels != -1
-    X_use, y_use = X[mask], labels[mask]
+    x_use, y_use = x[mask], labels[mask]
 
-    if len(X_use) < 2 or len(np.unique(y_use)) < 2:
+    if len(x_use) < 2 or len(np.unique(y_use)) < 2:
         return {
             "n_clusters": int(n_clusters),
             "silhouette": None,
@@ -157,9 +154,9 @@ def safe_cluster_metrics(X: np.ndarray, labels: np.ndarray) -> dict:
 
     return {
         "n_clusters": int(n_clusters),
-        "silhouette": float(silhouette_score(X_use, y_use)),
-        "calinski_harabasz": float(calinski_harabasz_score(X_use, y_use)),
-        "davies_bouldin": float(davies_bouldin_score(X_use, y_use)),
+        "silhouette": float(silhouette_score(x_use, y_use)),
+        "calinski_harabasz": float(calinski_harabasz_score(x_use, y_use)),
+        "davies_bouldin": float(davies_bouldin_score(x_use, y_use)),
     }
 
 
@@ -174,7 +171,7 @@ def main() -> None:
     ap.add_argument("--out-prefix", required=True)
     args = ap.parse_args()
 
-    X_df, sample_ids = load_features(args.features)
+    x_df, sample_ids = load_features(args.features)
     clusters_df, cluster_mode = load_clusters(args.clusters)
 
     if cluster_mode == "sample_id":
@@ -182,12 +179,12 @@ def main() -> None:
         common = sample_ids[sample_ids.isin(clusters.index)]
 
         if len(common) > 0:
-            X = X_df.loc[common.index].values
+            x = x_df.loc[common.index].values
             labels = clusters.loc[common.values].values
             aligned_ids = common.astype(str).tolist()
             alignment_mode = "sample_id"
         elif len(clusters_df) == len(sample_ids):
-            X = X_df.values
+            x = x_df.values
             labels = clusters_df["cluster"].values
             aligned_ids = sample_ids.astype(str).tolist()
             alignment_mode = "row_order_fallback"
@@ -204,15 +201,15 @@ def main() -> None:
                 f"  n_features={len(sample_ids)}\n"
                 f"  n_clusters={len(clusters_df)}"
             )
-        X = X_df.values
+        x = x_df.values
         labels = clusters_df["cluster"].values
         aligned_ids = sample_ids.astype(str).tolist()
         alignment_mode = "row_order"
 
-    if len(X) < 2:
+    if len(x) < 2:
         raise ValueError("Need at least 2 samples to compute cluster metrics")
 
-    selected = safe_cluster_metrics(X, labels)
+    selected = safe_cluster_metrics(x, labels)
     selected["input_clusters"] = Path(args.clusters).name
     selected["input_features"] = Path(args.features).name
     selected["n_samples_used"] = int(len(aligned_ids))
@@ -222,24 +219,26 @@ def main() -> None:
     pd.DataFrame([selected]).to_csv(metrics_tsv, sep="\t", index=False)
 
     rows = []
-    max_k = min(int(args.k_max), len(X))
+    max_k = min(int(args.k_max), len(x))
     for k in range(int(args.k_min), max_k + 1):
         model = KMeans(n_clusters=k, n_init="auto", random_state=42)
-        y = model.fit_predict(X)
+        y = model.fit_predict(x)
 
         sil = ch = db = None
-        if 1 < len(np.unique(y)) < len(X):
-            sil = float(silhouette_score(X, y))
-            ch = float(calinski_harabasz_score(X, y))
-            db = float(davies_bouldin_score(X, y))
-
-        rows.append({
-            "k": k,
-            "inertia": float(model.inertia_),
-            "silhouette": sil,
-            "calinski_harabasz": ch,
-            "davies_bouldin": db,
-        })
+        if 1 < len(np.unique(y)) < len(x):
+            sil = float(silhouette_score(x, y))
+            ch = float(calinski_harabasz_score(x, y))
+            db = float(davies_bouldin_score(x, y))
+
+        rows.append(
+            {
+                "k": k,
+                "inertia": float(model.inertia_),
+                "silhouette": sil,
+                "calinski_harabasz": ch,
+                "davies_bouldin": db,
+            }
+        )
 
     sweep_df = pd.DataFrame(rows)
     sweep_df.to_csv(args.out_k_sweep, sep=",", index=False, float_format="%.10g")
@@ -265,8 +264,18 @@ def plot_curve(metric, title, ylabel, out_png):
         if not sweep_df.empty:
             plot_curve("inertia", "Elbow method (KMeans inertia)", "inertia", f"{pfx}_elbow.png")
             plot_curve("silhouette", "Silhouette score (higher is better)", "silhouette", f"{pfx}_silhouette.png")
-            plot_curve("davies_bouldin", "Davies-Bouldin index (lower is better)", "davies_bouldin", f"{pfx}_davies_bouldin.png")
-            plot_curve("calinski_harabasz", "Calinski-Harabasz index (higher is better)", "calinski_harabasz", f"{pfx}_calinski.png")
+            plot_curve(
+                "davies_bouldin",
+                "Davies-Bouldin index (lower is better)",
+                "davies_bouldin",
+                f"{pfx}_davies_bouldin.png",
+            )
+            plot_curve(
+                "calinski_harabasz",
+                "Calinski-Harabasz index (higher is better)",
+                "calinski_harabasz",
+                f"{pfx}_calinski.png",
+            )
 
     except Exception as e:
         Path("plot_warning.txt").write_text(f"Plotting failed: {e}\n")
diff --git a/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py b/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py
index 18ffbba4fff7..020a65db986e 100644
--- a/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py
+++ b/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py
@@ -56,22 +56,15 @@ def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
             df = df.drop(columns=fid_cols)
         return df
 
-    raise ValueError(
-        f"Cannot find sample ID column (expected 'sample_id' or 'IID'). "
-        f"Found: {list(df.columns)}"
-    )
+    raise ValueError(f"Cannot find sample ID column (expected 'sample_id' or 'IID'). Found: {list(df.columns)}")
 
 
 def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
     df = pd.read_csv(path, sep=r"\s+", engine="python", dtype=str)
     df = _normalise_id_column(df)
     sample_ids = df["sample_id"].astype(str)
-    X = (
-        df.drop(columns=["sample_id"])
-        .apply(pd.to_numeric, errors="coerce")
-        .fillna(0.0)
-    )
-    return X, sample_ids
+    x = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce").fillna(0.0)
+    return x, sample_ids
 
 
 def load_clusters(path: str) -> pd.Series:
@@ -89,26 +82,27 @@ def safe_perplexity(n_samples: int, requested: float) -> float:
     return float(max(2.0, min(requested, upper)))
 
 
-def compute_umap(X: np.ndarray, n_neighbors: int, min_dist: float) -> np.ndarray:
+def compute_umap(x: np.ndarray, n_neighbors: int, min_dist: float) -> np.ndarray:
     try:
         import umap
+
         return umap.UMAP(
             n_components=2,
             n_neighbors=n_neighbors,
             min_dist=min_dist,
             random_state=42,
-        ).fit_transform(X)
+        ).fit_transform(x)
     except Exception as e:
         print(f"[WARN] UMAP failed, fallback to first 2 feature columns: {e}")
-        if X.shape[1] >= 2:
-            return X[:, :2]
-        elif X.shape[1] == 1:
-            return np.column_stack([X[:, 0], np.zeros(X.shape[0])])
+        if x.shape[1] >= 2:
+            return x[:, :2]
+        elif x.shape[1] == 1:
+            return np.column_stack([x[:, 0], np.zeros(x.shape[0])])
         else:
-            return np.zeros((X.shape[0], 2))
+            return np.zeros((x.shape[0], 2))
 
 
-def compute_tsne(X: np.ndarray, perplexity: float, max_iter: int) -> np.ndarray:
+def compute_tsne(x: np.ndarray, perplexity: float, max_iter: int) -> np.ndarray:
     return TSNE(
         n_components=2,
         perplexity=perplexity,
@@ -116,7 +110,7 @@ def compute_tsne(X: np.ndarray, perplexity: float, max_iter: int) -> np.ndarray:
         random_state=42,
         max_iter=max_iter,
         learning_rate="auto",
-    ).fit_transform(X)
+    ).fit_transform(x)
 
 
 def plot_scatter(df, x, y, out_png, title, xlabel=None, ylabel=None):
@@ -126,19 +120,32 @@ def plot_scatter(df, x, y, out_png, title, xlabel=None, ylabel=None):
     plt.figure(figsize=(7, 5))
     labels = df["cluster"].astype(int).values
     uniq = np.unique(labels)
-    sc = plt.scatter(df[x], df[y], c=labels, cmap="Paired", s=24,linewidths=0.4, alpha=0.85)
+    sc = plt.scatter(df[x], df[y], c=labels, cmap="Paired", s=24, linewidths=0.4, alpha=0.85)
     plt.title(title)
     plt.xlabel(xlabel or x)
     plt.ylabel(ylabel or y)
     plt.grid(True, alpha=0.5)
     handles = [
-        Line2D([0], [0], marker="o", linestyle="", markersize=7,
-               markerfacecolor=sc.cmap(sc.norm(k)), markeredgecolor="none",
-               label=f"Cluster {k}")
+        Line2D(
+            [0],
+            [0],
+            marker="o",
+            linestyle="",
+            markersize=7,
+            markerfacecolor=sc.cmap(sc.norm(k)),
+            markeredgecolor="none",
+            label=f"Cluster {k}",
+        )
         for k in uniq
     ]
-    plt.legend(handles=handles, title="Clusters", loc="center left",
-               bbox_to_anchor=(1.02, 0.5), borderaxespad=0.0, frameon=True)
+    plt.legend(
+        handles=handles,
+        title="Clusters",
+        loc="center left",
+        bbox_to_anchor=(1.02, 0.5),
+        borderaxespad=0.0,
+        frameon=True,
+    )
     plt.tight_layout()
     plt.savefig(out_png, dpi=200, bbox_inches="tight")
     plt.close()
@@ -160,7 +167,7 @@ def main() -> None:
     ap.add_argument("--out-pca-png", required=True)
     args = ap.parse_args()
 
-    X_df, sample_ids = load_features(args.features)
+    x_df, sample_ids = load_features(args.features)
     clusters = load_clusters(args.clusters)
 
     common = sample_ids[sample_ids.isin(clusters.index)]
@@ -171,30 +178,33 @@ def main() -> None:
             f"  clusters IDs (first 5): {list(clusters.index[:5])}"
         )
 
-    X = X_df.loc[common.index].values
+    x = x_df.loc[common.index].values
     y = clusters.loc[common.values].values
 
-    umap_coords = compute_umap(X, args.umap_neighbors, args.umap_min_dist)
-    umap_df = pd.DataFrame({
-        "sample_id": common.values,
-        "x": umap_coords[:, 0],
-        "y": umap_coords[:, 1],
-        "cluster": y,
-    })
+    umap_coords = compute_umap(x, args.umap_neighbors, args.umap_min_dist)
+    umap_df = pd.DataFrame(
+        {
+            "sample_id": common.values,
+            "x": umap_coords[:, 0],
+            "y": umap_coords[:, 1],
+            "cluster": y,
+        }
+    )
     umap_df.to_csv(args.out_umap_tsv, sep="\t", index=False)
     plot_scatter(umap_df, "x", "y", args.out_umap_png, "UMAP embedding")
 
     perp = safe_perplexity(len(common), args.tsne_perplexity)
-    tsne_coords = compute_tsne(X, perp, args.tsne_iter)
-    tsne_df = pd.DataFrame({
-        "sample_id": common.values,
-        "x": tsne_coords[:, 0],
-        "y": tsne_coords[:, 1],
-        "cluster": y,
-    })
+    tsne_coords = compute_tsne(x, perp, args.tsne_iter)
+    tsne_df = pd.DataFrame(
+        {
+            "sample_id": common.values,
+            "x": tsne_coords[:, 0],
+            "y": tsne_coords[:, 1],
+            "cluster": y,
+        }
+    )
     tsne_df.to_csv(args.out_tsne_tsv, sep="\t", index=False)
-    plot_scatter(tsne_df, "x", "y", args.out_tsne_png,
-                 f"t-SNE (perplexity={perp:.1f})")
+    plot_scatter(tsne_df, "x", "y", args.out_tsne_png, f"t-SNE (perplexity={perp:.1f})")
 
     pca_df = pd.read_csv(args.pca_scores, sep=r"\s+", engine="python", dtype=str)
     pca_df = _normalise_id_column(pca_df)
@@ -205,8 +215,7 @@ def main() -> None:
     for col in [c1, c2]:
         pca_df[col] = pd.to_numeric(pca_df[col], errors="coerce")
     merged = pca_df.merge(umap_df[["sample_id", "cluster"]], on="sample_id", how="inner")
-    plot_scatter(merged, c1, c2, args.out_pca_png,
-                 "PCA", c1, c2)
+    plot_scatter(merged, c1, c2, args.out_pca_png, "PCA", c1, c2)
 
 
 if __name__ == "__main__":
diff --git a/modules/nf-core/custom/cluster_viz/tests/main.nf.test b/modules/nf-core/custom/cluster_viz/tests/main.nf.test
index a65f09b9d76f..4d2da62b7cd1 100644
--- a/modules/nf-core/custom/cluster_viz/tests/main.nf.test
+++ b/modules/nf-core/custom/cluster_viz/tests/main.nf.test
@@ -12,7 +12,7 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = [ [id:'test'], 
+                input[0] = [ [id:'test'],
                     file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
                     file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
                     file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
@@ -38,7 +38,7 @@ nextflow_process {
         when {
             process {
                 """
-                input[0] = [ [id:'test'], 
+                input[0] = [ [id:'test'],
                     file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
                     file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
                     file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]

From 3a26119864710495502c76d6c5c10c5756d19ecd Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 4 May 2026 16:33:30 +0200
Subject: [PATCH 05/38] Fix custom clustering module lint and snapshots

---
 .../cluster_metrics/tests/main.nf.test.snap   | 26 +++++++-------
 .../cluster_viz/tests/main.nf.test.snap       | 34 +++++++++----------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
index d6dcb96d8697..095955fd70ce 100644
--- a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
@@ -32,15 +32,15 @@
                             "id": "test"
                         },
                         [
-                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
-                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
-                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
-                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
+                            "test_calinski.png:md5,9ad4ecb0779b571ed6f869b5523d821c",
+                            "test_davies_bouldin.png:md5,87eeef85f2bdc97cd3faab3c984fa4cc",
+                            "test_elbow.png:md5,e1980b81ed477b8defe062f5e1dd9af3",
+                            "test_silhouette.png:md5,4fff8e7a883ed692329e4d73485e0e50"
                         ]
                     ]
                 ],
                 "4": [
-                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                    "versions.yml:md5,7f675f156526d327dab66b3edbcaf81d"
                 ],
                 "k_sweep": [
                     [
@@ -64,10 +64,10 @@
                             "id": "test"
                         },
                         [
-                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
-                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
-                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
-                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
+                            "test_calinski.png:md5,9ad4ecb0779b571ed6f869b5523d821c",
+                            "test_davies_bouldin.png:md5,87eeef85f2bdc97cd3faab3c984fa4cc",
+                            "test_elbow.png:md5,e1980b81ed477b8defe062f5e1dd9af3",
+                            "test_silhouette.png:md5,4fff8e7a883ed692329e4d73485e0e50"
                         ]
                     ]
                 ],
@@ -80,11 +80,11 @@
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                    "versions.yml:md5,7f675f156526d327dab66b3edbcaf81d"
                 ]
             }
         ],
-        "timestamp": "2026-05-04T15:30:54.137415161",
+        "timestamp": "2026-05-04T16:19:18.154807795",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -117,10 +117,10 @@
                 ]
             ],
             [
-                "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                "versions.yml:md5,7f675f156526d327dab66b3edbcaf81d"
             ]
         ],
-        "timestamp": "2026-05-04T15:30:49.380085424",
+        "timestamp": "2026-05-04T16:18:56.462902926",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
diff --git a/modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap b/modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap
index 4fe180aa20aa..2b02e044f269 100644
--- a/modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap
@@ -6,7 +6,7 @@
                     {
                         "id": "test"
                     },
-                    "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
+                    "test_umap.tsv:md5,81ca2c64bffcac064124703e144b3aa4"
                 ]
             ],
             [
@@ -22,7 +22,7 @@
                     {
                         "id": "test"
                     },
-                    "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
+                    "test_umap.png:md5,c9b22ffd5236909e4ad212b4f8724f47"
                 ]
             ],
             [
@@ -30,7 +30,7 @@
                     {
                         "id": "test"
                     },
-                    "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
+                    "test_tsne.png:md5,16a8f8d45905af0f2312fc3b3cefe93f"
                 ]
             ],
             [
@@ -38,14 +38,14 @@
                     {
                         "id": "test"
                     },
-                    "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
+                    "test_pca.png:md5,eed3e7e2b50aac1f2fd4659cde79300b"
                 ]
             ],
             [
-                "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
+                "versions.yml:md5,1e4e4ef1869740e8d7aac3aa574d157c"
             ]
         ],
-        "timestamp": "2026-04-29T14:56:42.32373645",
+        "timestamp": "2026-05-04T16:25:58.786254808",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -59,7 +59,7 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
+                        "test_umap.tsv:md5,81ca2c64bffcac064124703e144b3aa4"
                     ]
                 ],
                 "1": [
@@ -75,7 +75,7 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
+                        "test_umap.png:md5,c9b22ffd5236909e4ad212b4f8724f47"
                     ]
                 ],
                 "3": [
@@ -83,7 +83,7 @@
                         {
                             "id": "test"
                         },
-                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
+                        "test_tsne.png:md5,16a8f8d45905af0f2312fc3b3cefe93f"
                     ]
                 ],
                 "4": [
@@ -91,18 +91,18 @@
                         {
                             "id": "test"
                         },
-                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
+                        "test_pca.png:md5,eed3e7e2b50aac1f2fd4659cde79300b"
                     ]
                 ],
                 "5": [
-                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
+                    "versions.yml:md5,1e4e4ef1869740e8d7aac3aa574d157c"
                 ],
                 "pca_png": [
                     [
                         {
                             "id": "test"
                         },
-                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
+                        "test_pca.png:md5,eed3e7e2b50aac1f2fd4659cde79300b"
                     ]
                 ],
                 "tsne": [
@@ -118,7 +118,7 @@
                         {
                             "id": "test"
                         },
-                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
+                        "test_tsne.png:md5,16a8f8d45905af0f2312fc3b3cefe93f"
                     ]
                 ],
                 "umap": [
@@ -126,7 +126,7 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
+                        "test_umap.tsv:md5,81ca2c64bffcac064124703e144b3aa4"
                     ]
                 ],
                 "umap_png": [
@@ -134,15 +134,15 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
+                        "test_umap.png:md5,c9b22ffd5236909e4ad212b4f8724f47"
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
+                    "versions.yml:md5,1e4e4ef1869740e8d7aac3aa574d157c"
                 ]
             }
         ],
-        "timestamp": "2026-04-29T14:56:54.120297782",
+        "timestamp": "2026-05-04T16:26:22.932828948",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"

From e4877e48bd416dcf183added8008511a582f3247 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 6 May 2026 17:31:48 +0200
Subject: [PATCH 06/38] Address review comments for clustering custom modules

---
 .../environment.yml                           |  0
 .../main.nf                                   |  2 +-
 .../meta.yml                                  |  2 +-
 .../templates/cluster_metrics.py              |  0
 .../tests/data/test_clusters.csv              |  0
 .../tests/data/test_features.tsv              |  0
 .../tests/main.nf.test                        | 12 +--
 .../tests/main.nf.test.snap                   | 96 +++++++++----------
 .../environment.yml                           |  0
 .../main.nf                                   |  2 +-
 .../meta.yml                                  |  2 +-
 .../templates/cluster_viz.py                  |  0
 .../tests/data/test_clusters.csv              |  0
 .../tests/data/test_features.tsv              |  0
 .../tests/data/test_pca.eigenvec              |  0
 .../tests/main.nf.test                        | 20 ++--
 .../tests/main.nf.test.snap                   | 38 ++++----
 17 files changed, 87 insertions(+), 87 deletions(-)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/environment.yml (100%)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/main.nf (94%)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/meta.yml (99%)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/templates/cluster_metrics.py (100%)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/tests/data/test_clusters.csv (100%)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/tests/data/test_features.tsv (100%)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/tests/main.nf.test (62%)
 rename modules/nf-core/custom/{cluster_metrics => clustermetrics}/tests/main.nf.test.snap (73%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/environment.yml (100%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/main.nf (95%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/meta.yml (99%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/templates/cluster_viz.py (100%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/tests/data/test_clusters.csv (100%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/tests/data/test_features.tsv (100%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/tests/data/test_pca.eigenvec (100%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/tests/main.nf.test (68%)
 rename modules/nf-core/custom/{cluster_viz => clustervisualiation}/tests/main.nf.test.snap (70%)

diff --git a/modules/nf-core/custom/cluster_metrics/environment.yml b/modules/nf-core/custom/clustermetrics/environment.yml
similarity index 100%
rename from modules/nf-core/custom/cluster_metrics/environment.yml
rename to modules/nf-core/custom/clustermetrics/environment.yml
diff --git a/modules/nf-core/custom/cluster_metrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
similarity index 94%
rename from modules/nf-core/custom/cluster_metrics/main.nf
rename to modules/nf-core/custom/clustermetrics/main.nf
index 71b91e9c3c18..101f3cc78e84 100644
--- a/modules/nf-core/custom/cluster_metrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -32,7 +32,7 @@ process CLUSTER_METRICS {
     def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
 
     """
-    python3 ${projectDir}/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py \\
+    python3 ${moduleDir}/templates/cluster_metrics.py \\
         --features ${features} \\
         --clusters ${clusters} \\
         --out-k-sweep ${prefix}_k_sweep.csv \\
diff --git a/modules/nf-core/custom/cluster_metrics/meta.yml b/modules/nf-core/custom/clustermetrics/meta.yml
similarity index 99%
rename from modules/nf-core/custom/cluster_metrics/meta.yml
rename to modules/nf-core/custom/clustermetrics/meta.yml
index 0716b49dfb6a..2944931ae49c 100644
--- a/modules/nf-core/custom/cluster_metrics/meta.yml
+++ b/modules/nf-core/custom/clustermetrics/meta.yml
@@ -1,4 +1,4 @@
-name: "cluster_metrics"
+name: "clustermetrics"
 description: "Computes clustering quality metrics (silhouette, Calinski-Harabasz,
   Davies-Bouldin) and performs k-sweep analysis"
 keywords:
diff --git a/modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
similarity index 100%
rename from modules/nf-core/custom/cluster_metrics/templates/cluster_metrics.py
rename to modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
diff --git a/modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv b/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv
similarity index 100%
rename from modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv
rename to modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv
diff --git a/modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv b/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv
similarity index 100%
rename from modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv
rename to modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv
diff --git a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test b/modules/nf-core/custom/clustermetrics/tests/main.nf.test
similarity index 62%
rename from modules/nf-core/custom/cluster_metrics/tests/main.nf.test
rename to modules/nf-core/custom/clustermetrics/tests/main.nf.test
index d541633b82d1..351d1c00df79 100644
--- a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test
+++ b/modules/nf-core/custom/clustermetrics/tests/main.nf.test
@@ -5,14 +5,14 @@ nextflow_process {
     tag "modules"
     tag "modules_nfcore"
     tag "custom"
-    tag "custom/cluster_metrics"
-    tag "cluster_metrics"
+    tag "custom/clustermetrics"
+    tag "clustermetrics"
 
-    test("cluster_metrics - features and clusters") {
+    test("clustermetrics - features and clusters") {
         when {
             process {
                 """
-                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
@@ -28,12 +28,12 @@ nextflow_process {
         }
     }
 
-    test("cluster_metrics - features and clusters - stub") {
+    test("clustermetrics - features and clusters - stub") {
         options "-stub"
         when {
             process {
                 """
-                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/cluster_metrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
diff --git a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
similarity index 73%
rename from modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
rename to modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
index 095955fd70ce..1e28bf786221 100644
--- a/modules/nf-core/custom/cluster_metrics/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
@@ -1,5 +1,41 @@
 {
-    "cluster_metrics - features and clusters - stub": {
+    "clustermetrics - features and clusters": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
+                ]
+            ],
+            [
+                "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+            ]
+        ],
+        "timestamp": "2026-05-06T17:29:16.510271878",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    },
+    "clustermetrics - features and clusters - stub": {
         "content": [
             {
                 "0": [
@@ -32,15 +68,15 @@
                             "id": "test"
                         },
                         [
-                            "test_calinski.png:md5,9ad4ecb0779b571ed6f869b5523d821c",
-                            "test_davies_bouldin.png:md5,87eeef85f2bdc97cd3faab3c984fa4cc",
-                            "test_elbow.png:md5,e1980b81ed477b8defe062f5e1dd9af3",
-                            "test_silhouette.png:md5,4fff8e7a883ed692329e4d73485e0e50"
+                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
+                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
+                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
+                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
                         ]
                     ]
                 ],
                 "4": [
-                    "versions.yml:md5,7f675f156526d327dab66b3edbcaf81d"
+                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
                 ],
                 "k_sweep": [
                     [
@@ -64,10 +100,10 @@
                             "id": "test"
                         },
                         [
-                            "test_calinski.png:md5,9ad4ecb0779b571ed6f869b5523d821c",
-                            "test_davies_bouldin.png:md5,87eeef85f2bdc97cd3faab3c984fa4cc",
-                            "test_elbow.png:md5,e1980b81ed477b8defe062f5e1dd9af3",
-                            "test_silhouette.png:md5,4fff8e7a883ed692329e4d73485e0e50"
+                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
+                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
+                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
+                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
                         ]
                     ]
                 ],
@@ -80,47 +116,11 @@
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,7f675f156526d327dab66b3edbcaf81d"
+                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
                 ]
             }
         ],
-        "timestamp": "2026-05-04T16:19:18.154807795",
-        "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.09.0"
-        }
-    },
-    "cluster_metrics - features and clusters": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
-                ]
-            ],
-            [
-                "versions.yml:md5,7f675f156526d327dab66b3edbcaf81d"
-            ]
-        ],
-        "timestamp": "2026-05-04T16:18:56.462902926",
+        "timestamp": "2026-05-06T17:29:21.180634202",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
diff --git a/modules/nf-core/custom/cluster_viz/environment.yml b/modules/nf-core/custom/clustervisualiation/environment.yml
similarity index 100%
rename from modules/nf-core/custom/cluster_viz/environment.yml
rename to modules/nf-core/custom/clustervisualiation/environment.yml
diff --git a/modules/nf-core/custom/cluster_viz/main.nf b/modules/nf-core/custom/clustervisualiation/main.nf
similarity index 95%
rename from modules/nf-core/custom/cluster_viz/main.nf
rename to modules/nf-core/custom/clustervisualiation/main.nf
index a5d6d9eb8e50..58cb89db8142 100644
--- a/modules/nf-core/custom/cluster_viz/main.nf
+++ b/modules/nf-core/custom/clustervisualiation/main.nf
@@ -33,7 +33,7 @@ process CLUSTER_VIZ {
     def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
 
     """
-    python3 ${projectDir}/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py \\
+    python3 ${moduleDir}/templates/cluster_viz.py \\
         --features ${features} \\
         --clusters ${clusters} \\
         --pca-scores ${pca_scores} \\
diff --git a/modules/nf-core/custom/cluster_viz/meta.yml b/modules/nf-core/custom/clustervisualiation/meta.yml
similarity index 99%
rename from modules/nf-core/custom/cluster_viz/meta.yml
rename to modules/nf-core/custom/clustervisualiation/meta.yml
index cdad47cfcb26..f08ba6bfbc35 100644
--- a/modules/nf-core/custom/cluster_viz/meta.yml
+++ b/modules/nf-core/custom/clustervisualiation/meta.yml
@@ -1,4 +1,4 @@
-name: "cluster_viz"
+name: "clustervisualiation"
 description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster"
 keywords:
   - clustering
diff --git a/modules/nf-core/custom/cluster_viz/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
similarity index 100%
rename from modules/nf-core/custom/cluster_viz/templates/cluster_viz.py
rename to modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
diff --git a/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv b/modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv
similarity index 100%
rename from modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv
rename to modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv
diff --git a/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv b/modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv
similarity index 100%
rename from modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv
rename to modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv
diff --git a/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec b/modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec
similarity index 100%
rename from modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec
rename to modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec
diff --git a/modules/nf-core/custom/cluster_viz/tests/main.nf.test b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test
similarity index 68%
rename from modules/nf-core/custom/cluster_viz/tests/main.nf.test
rename to modules/nf-core/custom/clustervisualiation/tests/main.nf.test
index 4d2da62b7cd1..f4695eedae1e 100644
--- a/modules/nf-core/custom/cluster_viz/tests/main.nf.test
+++ b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test
@@ -5,17 +5,17 @@ nextflow_process {
     tag "modules"
     tag "modules_nfcore"
     tag "custom"
-    tag "custom/cluster_viz"
-    tag "cluster_viz"
+    tag "custom/clustervisualiation"
+    tag "clustervisualiation"
 
-    test("cluster_viz - features clusters pca") {
+    test("clustervisualiation - features clusters pca") {
         when {
             process {
                 """
                 input[0] = [ [id:'test'],
-                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
+                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
@@ -33,15 +33,15 @@ nextflow_process {
         }
     }
 
-    test("cluster_viz - features clusters pca - stub") {
+    test("clustervisualiation - features clusters pca - stub") {
         options "-stub"
         when {
             process {
                 """
                 input[0] = [ [id:'test'],
-                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_clusters.csv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/cluster_viz/tests/data/test_pca.eigenvec", checkIfExists: true) ]
+                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec", checkIfExists: true) ]
                 input[1] = 'test'
                 """
             }
diff --git a/modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test.snap
similarity index 70%
rename from modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap
rename to modules/nf-core/custom/clustervisualiation/tests/main.nf.test.snap
index 2b02e044f269..bc8ca92380e9 100644
--- a/modules/nf-core/custom/cluster_viz/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test.snap
@@ -1,12 +1,12 @@
 {
-    "cluster_viz - features clusters pca": {
+    "clustervisualiation - features clusters pca": {
         "content": [
             [
                 [
                     {
                         "id": "test"
                     },
-                    "test_umap.tsv:md5,81ca2c64bffcac064124703e144b3aa4"
+                    "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
                 ]
             ],
             [
@@ -22,7 +22,7 @@
                     {
                         "id": "test"
                     },
-                    "test_umap.png:md5,c9b22ffd5236909e4ad212b4f8724f47"
+                    "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
                 ]
             ],
             [
@@ -30,7 +30,7 @@
                     {
                         "id": "test"
                     },
-                    "test_tsne.png:md5,16a8f8d45905af0f2312fc3b3cefe93f"
+                    "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
                 ]
             ],
             [
@@ -38,20 +38,20 @@
                     {
                         "id": "test"
                     },
-                    "test_pca.png:md5,eed3e7e2b50aac1f2fd4659cde79300b"
+                    "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
                 ]
             ],
             [
-                "versions.yml:md5,1e4e4ef1869740e8d7aac3aa574d157c"
+                "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
             ]
         ],
-        "timestamp": "2026-05-04T16:25:58.786254808",
+        "timestamp": "2026-05-06T17:29:34.854148226",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
         }
     },
-    "cluster_viz - features clusters pca - stub": {
+    "clustervisualiation - features clusters pca - stub": {
         "content": [
             {
                 "0": [
@@ -59,7 +59,7 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.tsv:md5,81ca2c64bffcac064124703e144b3aa4"
+                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
                     ]
                 ],
                 "1": [
@@ -75,7 +75,7 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.png:md5,c9b22ffd5236909e4ad212b4f8724f47"
+                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
                     ]
                 ],
                 "3": [
@@ -83,7 +83,7 @@
                         {
                             "id": "test"
                         },
-                        "test_tsne.png:md5,16a8f8d45905af0f2312fc3b3cefe93f"
+                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
                     ]
                 ],
                 "4": [
@@ -91,18 +91,18 @@
                         {
                             "id": "test"
                         },
-                        "test_pca.png:md5,eed3e7e2b50aac1f2fd4659cde79300b"
+                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
                     ]
                 ],
                 "5": [
-                    "versions.yml:md5,1e4e4ef1869740e8d7aac3aa574d157c"
+                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
                 ],
                 "pca_png": [
                     [
                         {
                             "id": "test"
                         },
-                        "test_pca.png:md5,eed3e7e2b50aac1f2fd4659cde79300b"
+                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
                     ]
                 ],
                 "tsne": [
@@ -118,7 +118,7 @@
                         {
                             "id": "test"
                         },
-                        "test_tsne.png:md5,16a8f8d45905af0f2312fc3b3cefe93f"
+                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
                     ]
                 ],
                 "umap": [
@@ -126,7 +126,7 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.tsv:md5,81ca2c64bffcac064124703e144b3aa4"
+                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
                     ]
                 ],
                 "umap_png": [
@@ -134,15 +134,15 @@
                         {
                             "id": "test"
                         },
-                        "test_umap.png:md5,c9b22ffd5236909e4ad212b4f8724f47"
+                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,1e4e4ef1869740e8d7aac3aa574d157c"
+                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
                 ]
             }
         ],
-        "timestamp": "2026-05-04T16:26:22.932828948",
+        "timestamp": "2026-05-06T17:29:46.705755643",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"

From 7e19b220294a8ff1e3074fca0b47b4db310630ef Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Thu, 7 May 2026 00:38:46 +0200
Subject: [PATCH 07/38] Fix custom clustering module metadata

---
 modules/nf-core/custom/clustermetrics/meta.yml      | 2 +-
 modules/nf-core/custom/clustervisualiation/meta.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/meta.yml b/modules/nf-core/custom/clustermetrics/meta.yml
index 2944931ae49c..a467cc634cba 100644
--- a/modules/nf-core/custom/clustermetrics/meta.yml
+++ b/modules/nf-core/custom/clustermetrics/meta.yml
@@ -1,4 +1,4 @@
-name: "clustermetrics"
+name: "CLUSTER_METRICS"
 description: "Computes clustering quality metrics (silhouette, Calinski-Harabasz,
   Davies-Bouldin) and performs k-sweep analysis"
 keywords:
diff --git a/modules/nf-core/custom/clustervisualiation/meta.yml b/modules/nf-core/custom/clustervisualiation/meta.yml
index f08ba6bfbc35..9828a5c2a6bc 100644
--- a/modules/nf-core/custom/clustervisualiation/meta.yml
+++ b/modules/nf-core/custom/clustervisualiation/meta.yml
@@ -1,4 +1,4 @@
-name: "clustervisualiation"
+name: "CLUSTER_VIZ"
 description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster"
 keywords:
   - clustering

From fdb1be424e843ca3dad4283951d30e9cdea25256 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Thu, 7 May 2026 13:14:16 +0200
Subject: [PATCH 08/38] Add Dockerfile for custom/clustermetrics and
 custom/clustervisualiation

---
 modules/nf-core/custom/clustermetrics/Dockerfile      | 8 ++++++++
 modules/nf-core/custom/clustervisualiation/Dockerfile | 8 ++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 modules/nf-core/custom/clustermetrics/Dockerfile
 create mode 100644 modules/nf-core/custom/clustervisualiation/Dockerfile

diff --git a/modules/nf-core/custom/clustermetrics/Dockerfile b/modules/nf-core/custom/clustermetrics/Dockerfile
new file mode 100644
index 000000000000..1fd701bd86f8
--- /dev/null
+++ b/modules/nf-core/custom/clustermetrics/Dockerfile
@@ -0,0 +1,8 @@
+FROM nfcore/base:2.0
+
+LABEL authors="dbaku42" \
+      description="Docker image containing all requirements for nf-core/custom/clustermetrics"
+
+COPY environment.yml /
+RUN micromamba install -y -n base -f /environment.yml && \
+    micromamba clean -a -y
diff --git a/modules/nf-core/custom/clustervisualiation/Dockerfile b/modules/nf-core/custom/clustervisualiation/Dockerfile
new file mode 100644
index 000000000000..577062b77918
--- /dev/null
+++ b/modules/nf-core/custom/clustervisualiation/Dockerfile
@@ -0,0 +1,8 @@
+FROM nfcore/base:2.0
+
+LABEL authors="dbaku42" \
+      description="Docker image containing all requirements for nf-core/custom/clustervisualiation"
+
+COPY environment.yml /
+RUN micromamba install -y -n base -f /environment.yml && \
+    micromamba clean -a -y

From d3a382b5eb317f400f36ec37a1b518f8877b59d5 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Thu, 7 May 2026 13:38:58 +0200
Subject: [PATCH 09/38] Add container directive for custom/clustermetrics and
 clustervisualiation

---
 modules/nf-core/custom/clustermetrics/main.nf      | 4 +++-
 modules/nf-core/custom/clustervisualiation/main.nf | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index 101f3cc78e84..b157d90bfa22 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -13,7 +13,9 @@ process CLUSTER_METRICS {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/clustermetrics:dev' :
+        'quay.io/nf-core/clustermetrics:dev' }"
     input:
     tuple val(meta), path(features), path(clusters)
     val out_prefix
diff --git a/modules/nf-core/custom/clustervisualiation/main.nf b/modules/nf-core/custom/clustervisualiation/main.nf
index 58cb89db8142..0c8259de5ceb 100644
--- a/modules/nf-core/custom/clustervisualiation/main.nf
+++ b/modules/nf-core/custom/clustervisualiation/main.nf
@@ -13,7 +13,9 @@ process CLUSTER_VIZ {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    'https://depot.galaxyproject.org/singularity/clustervisualiation:dev' :
+    'quay.io/nf-core/clustervisualiation:dev' }"
     input:
     tuple val(meta), path(features), path(clusters), path(pca_scores)
     val out_prefix

From 89287dbffc9c3dd33a9812bed3625095e1c9c013 Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Fri, 8 May 2026 13:06:10 +0200
Subject: [PATCH 10/38] Update modules/nf-core/custom/clustermetrics/main.nf

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 modules/nf-core/custom/clustermetrics/main.nf | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index b157d90bfa22..4a50b288f49c 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -1,15 +1,4 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl = 2
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    PROCESS: CLUSTER_METRICS
-    Compute clustering quality metrics and k-sweep
-    Author: Donald Baku (author)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-process CLUSTER_METRICS {
+process CUSTOM_CLUSTERMETRICS {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"

From d5ac668b057e90d6a42ebf9381e126b955299e00 Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Fri, 8 May 2026 13:06:24 +0200
Subject: [PATCH 11/38] Update modules/nf-core/custom/clustermetrics/main.nf

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 modules/nf-core/custom/clustermetrics/main.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index 4a50b288f49c..a15db53eeae1 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -2,9 +2,9 @@ process CUSTOM_CLUSTERMETRICS {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/clustermetrics:dev' :
-        'quay.io/nf-core/clustermetrics:dev' }"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/69/69a6d33f6bd1a901cad8a6914b6ad11a7db6c35005b4ff8604f20f1baba10fc3/data' :
+        'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn:b7d7028d28dc4084' }"
     input:
     tuple val(meta), path(features), path(clusters)
     val out_prefix

From 55e868a733c2579d497ae98248ae143c68f06d5d Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Fri, 8 May 2026 13:06:39 +0200
Subject: [PATCH 12/38] Update modules/nf-core/custom/clustermetrics/main.nf

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 modules/nf-core/custom/clustermetrics/main.nf | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index a15db53eeae1..23b9447dd36c 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -20,19 +20,18 @@ process CUSTOM_CLUSTERMETRICS {
     task.ext.when == null || task.ext.when
 
     script:
-    def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
+    template 'cluster_metrics.py'
 
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    python3 ${moduleDir}/templates/cluster_metrics.py \\
-        --features ${features} \\
-        --clusters ${clusters} \\
-        --out-k-sweep ${prefix}_k_sweep.csv \\
-        --out-selected ${prefix}_selected.json \\
-        --out-prefix ${prefix}
+    touch ${prefix}_metrics.tsv
+    touch ${prefix}_k_sweep.csv
+    touch ${prefix}_selected.json
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        python: \$(python3 --version | cut -d' ' -f2)
+        python: \$(python3 --version | sed 's/Python //')
         pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
         scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
         matplotlib: \$(python3 -c "import matplotlib; print(matplotlib.__version__)")

From e492edbd7b876c38398be07081f4f231e775c9f9 Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Fri, 8 May 2026 13:06:48 +0200
Subject: [PATCH 13/38] Update modules/nf-core/custom/clustermetrics/main.nf

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 modules/nf-core/custom/clustermetrics/main.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index 23b9447dd36c..1c8fe203fc9f 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -7,7 +7,6 @@ process CUSTOM_CLUSTERMETRICS {
         'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn:b7d7028d28dc4084' }"
     input:
     tuple val(meta), path(features), path(clusters)
-    val out_prefix
 
     output:
     tuple val(meta), path("*_metrics.tsv")     , emit: metrics

From b8529f67d7b3f054d86a4506f105562906c3d1cd Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Fri, 8 May 2026 13:07:02 +0200
Subject: [PATCH 14/38] Update
 modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 .../custom/clustervisualiation/templates/cluster_viz.py  | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
index 020a65db986e..7d470fc2a303 100644
--- a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
@@ -68,11 +68,10 @@ def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
 
 
 def load_clusters(path: str) -> pd.Series:
-    df = pd.read_csv(path, sep=",", dtype=str)
-    df = _normalise_id_column(df)
-    if "cluster" not in df.columns:
-        raise ValueError("clusters CSV must have a 'cluster' column")
-    return df.set_index("sample_id")["cluster"].astype(int)
+    df = pd.read_csv(path)
+    if "sample_id" not in df.columns or "cluster" not in df.columns:
+        raise ValueError(f"clusters file must have 'sample_id' and 'cluster' columns. Found: {list(df.columns)}")
+    return df.set_index(df["sample_id"].astype(str))["cluster"].astype(int)
 
 
 def safe_perplexity(n_samples: int, requested: float) -> float:

From 12bb00d90f449303a0afe87fc8bace8f8a35a87d Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Fri, 8 May 2026 13:07:25 +0200
Subject: [PATCH 15/38] Update
 modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 .../templates/cluster_viz.py                  | 25 ++++++-------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
index 7d470fc2a303..108d19f80e3a 100644
--- a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
@@ -82,24 +82,13 @@ def safe_perplexity(n_samples: int, requested: float) -> float:
 
 
 def compute_umap(x: np.ndarray, n_neighbors: int, min_dist: float) -> np.ndarray:
-    try:
-        import umap
-
-        return umap.UMAP(
-            n_components=2,
-            n_neighbors=n_neighbors,
-            min_dist=min_dist,
-            random_state=42,
-        ).fit_transform(x)
-    except Exception as e:
-        print(f"[WARN] UMAP failed, fallback to first 2 feature columns: {e}")
-        if x.shape[1] >= 2:
-            return x[:, :2]
-        elif x.shape[1] == 1:
-            return np.column_stack([x[:, 0], np.zeros(x.shape[0])])
-        else:
-            return np.zeros((x.shape[0], 2))
-
+    import umap
+    return umap.UMAP(
+        n_components=2,
+        n_neighbors=n_neighbors,
+        min_dist=min_dist,
+        random_state=42,
+    ).fit_transform(x)
 
 def compute_tsne(x: np.ndarray, perplexity: float, max_iter: int) -> np.ndarray:
     return TSNE(

From a4a01a388918ae5fe546e180f523d2fdb5e3b8b9 Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Fri, 8 May 2026 13:07:37 +0200
Subject: [PATCH 16/38] Update
 modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 .../clustervisualiation/templates/cluster_viz.py       | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
index 108d19f80e3a..7d757ca26873 100644
--- a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
@@ -194,16 +194,6 @@ def main() -> None:
     tsne_df.to_csv(args.out_tsne_tsv, sep="\t", index=False)
     plot_scatter(tsne_df, "x", "y", args.out_tsne_png, f"t-SNE (perplexity={perp:.1f})")
 
-    pca_df = pd.read_csv(args.pca_scores, sep=r"\s+", engine="python", dtype=str)
-    pca_df = _normalise_id_column(pca_df)
-    comp_cols = [c for c in pca_df.columns if c != "sample_id"]
-    if len(comp_cols) < 2:
-        raise ValueError("pca_scores must have at least 2 PC columns")
-    c1, c2 = comp_cols[0], comp_cols[1]
-    for col in [c1, c2]:
-        pca_df[col] = pd.to_numeric(pca_df[col], errors="coerce")
-    merged = pca_df.merge(umap_df[["sample_id", "cluster"]], on="sample_id", how="inner")
-    plot_scatter(merged, c1, c2, args.out_pca_png, "PCA", c1, c2)
 
 
 if __name__ == "__main__":

From 060eb5da79477b5f845e1b985ff68ed2bb6198f3 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Fri, 8 May 2026 13:25:32 +0200
Subject: [PATCH 17/38] fix: use template for cluster visualization module

---
 .../custom/clustervisualiation/main.nf        | 22 +----------
 .../templates/cluster_viz.py                  | 37 +++++++++++++++++++
 2 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualiation/main.nf b/modules/nf-core/custom/clustervisualiation/main.nf
index 0c8259de5ceb..c2682992c436 100644
--- a/modules/nf-core/custom/clustervisualiation/main.nf
+++ b/modules/nf-core/custom/clustervisualiation/main.nf
@@ -32,26 +32,6 @@ process CLUSTER_VIZ {
     task.ext.when == null || task.ext.when
 
     script:
-    def prefix = task.ext.prefix ?: out_prefix ?: "${meta.id}"
+    template 'cluster_viz.py'
 
-    """
-    python3 ${moduleDir}/templates/cluster_viz.py \\
-        --features ${features} \\
-        --clusters ${clusters} \\
-        --pca-scores ${pca_scores} \\
-        --out-umap-tsv ${prefix}_umap.tsv \\
-        --out-tsne-tsv ${prefix}_tsne.tsv \\
-        --out-umap-png ${prefix}_umap.png \\
-        --out-tsne-png ${prefix}_tsne.png \\
-        --out-pca-png ${prefix}_pca.png
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python3 --version | cut -d' ' -f2)
-        pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
-        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
-        umap-learn: \$(python3 -c "import umap; print(umap.__version__)" 2>/dev/null || echo 'N/A')
-        matplotlib: \$(python3 -c "import matplotlib; print(matplotlib.__version__)")
-    END_VERSIONS
-    """
 }
diff --git a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
index 7d757ca26873..63f6b0c24a10 100644
--- a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
@@ -197,4 +197,41 @@ def main() -> None:
 
 
 if __name__ == "__main__":
+    import sys
+    import platform
+
+    prefix = "${task.ext.prefix ?: out_prefix ?: meta.id}"
+
+    sys.argv = [
+        "cluster_viz.py",
+        "--features", "$features",
+        "--clusters", "$clusters",
+        "--pca-scores", "$pca_scores",
+        "--out-umap-tsv", f"{prefix}_umap.tsv",
+        "--out-tsne-tsv", f"{prefix}_tsne.tsv",
+        "--out-umap-png", f"{prefix}_umap.png",
+        "--out-tsne-png", f"{prefix}_tsne.png",
+        "--out-pca-png", f"{prefix}_pca.png",
+    ]
+
     main()
+
+    import matplotlib
+    import pandas
+    import sklearn
+
+    try:
+        import umap
+        umap_version = umap.__version__
+    except Exception:
+        umap_version = "N/A"
+
+    with open("versions.yml", "w") as f:
+        f.write(
+            f'"${task.process}":\n'
+            f'    python: {platform.python_version()}\n'
+            f'    pandas: {pandas.__version__}\n'
+            f'    scikit-learn: {sklearn.__version__}\n'
+            f'    umap-learn: {umap_version}\n'
+            f'    matplotlib: {matplotlib.__version__}\n'
+        )

From d48a224c86207fb8b5ccc16c04ad981fbc0a4e7c Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Fri, 8 May 2026 14:01:11 +0200
Subject: [PATCH 18/38] style: clean cluster visualization module main

---
 modules/nf-core/custom/clustervisualiation/main.nf  | 13 +------------
 modules/nf-core/custom/clustervisualiation/meta.yml |  2 +-
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualiation/main.nf b/modules/nf-core/custom/clustervisualiation/main.nf
index c2682992c436..cd67ff46275b 100644
--- a/modules/nf-core/custom/clustervisualiation/main.nf
+++ b/modules/nf-core/custom/clustervisualiation/main.nf
@@ -1,15 +1,4 @@
-#!/usr/bin/env nextflow
-nextflow.enable.dsl = 2
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    PROCESS: CLUSTER_VIZ
-    Generates PCA, UMAP and t-SNE visualizations colored by cluster
-    Author: Donald Baku (author)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-process CLUSTER_VIZ {
+process CUSTOM_CLUSTERVISUALIATION {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
diff --git a/modules/nf-core/custom/clustervisualiation/meta.yml b/modules/nf-core/custom/clustervisualiation/meta.yml
index 9828a5c2a6bc..63bcd13f76ec 100644
--- a/modules/nf-core/custom/clustervisualiation/meta.yml
+++ b/modules/nf-core/custom/clustervisualiation/meta.yml
@@ -1,4 +1,4 @@
-name: "CLUSTER_VIZ"
+name: "CUSTOM_CLUSTERVISUALIATION"
 description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster"
 keywords:
   - clustering

From 2d85309ecc0d0e3ba3dba76defc65f12eae568c6 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Fri, 8 May 2026 16:38:41 +0200
Subject: [PATCH 19/38] fix: address reviewer feedback for cluster modules

---
 .../nf-core/custom/clustermetrics/Dockerfile  |  8 -----
 modules/nf-core/custom/clustermetrics/main.nf |  5 ++--
 .../nf-core/custom/clustermetrics/meta.yml    | 21 ++++++--------
 .../templates/cluster_metrics.py              | 29 +++++++++++++++++++
 .../custom/clustervisualiation/Dockerfile     |  8 -----
 .../custom/clustervisualiation/main.nf        |  8 ++---
 .../custom/clustervisualiation/meta.yml       |  3 --
 .../templates/cluster_viz.py                  |  2 +-
 .../clustervisualiation/tests/main.nf.test    |  4 +--
 9 files changed, 48 insertions(+), 40 deletions(-)
 delete mode 100644 modules/nf-core/custom/clustermetrics/Dockerfile
 delete mode 100644 modules/nf-core/custom/clustervisualiation/Dockerfile

diff --git a/modules/nf-core/custom/clustermetrics/Dockerfile b/modules/nf-core/custom/clustermetrics/Dockerfile
deleted file mode 100644
index 1fd701bd86f8..000000000000
--- a/modules/nf-core/custom/clustermetrics/Dockerfile
+++ /dev/null
@@ -1,8 +0,0 @@
-FROM nfcore/base:2.0
-
-LABEL authors="dbaku42" \
-      description="Docker image containing all requirements for nf-core/custom/clustermetrics"
-
-COPY environment.yml /
-RUN micromamba install -y -n base -f /environment.yml && \
-    micromamba clean -a -y
diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index 1c8fe203fc9f..2d7a374ba295 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -3,8 +3,9 @@ process CUSTOM_CLUSTERMETRICS {
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/69/69a6d33f6bd1a901cad8a6914b6ad11a7db6c35005b4ff8604f20f1baba10fc3/data' :
-        'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn:b7d7028d28dc4084' }"
+    'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c9/c993602a6f49b387b34e84f41fda5a393355850b2dd6ab776f1307a0e7b9d540/data' :
+    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn:c378d29780adbcbf' }"
+    
     input:
     tuple val(meta), path(features), path(clusters)
 
diff --git a/modules/nf-core/custom/clustermetrics/meta.yml b/modules/nf-core/custom/clustermetrics/meta.yml
index a467cc634cba..432d56069427 100644
--- a/modules/nf-core/custom/clustermetrics/meta.yml
+++ b/modules/nf-core/custom/clustermetrics/meta.yml
@@ -1,4 +1,4 @@
-name: "CLUSTER_METRICS"
+name: "CUSTOM_CLUSTERMETRICS"
 description: "Computes clustering quality metrics (silhouette, Calinski-Harabasz,
   Davies-Bouldin) and performs k-sweep analysis"
 keywords:
@@ -19,22 +19,19 @@ tools:
 input:
   - - meta:
         type: map
-        description: Groovy Map containing sample information
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
     - features:
         type: file
-        description: TSV file with sample_id and numeric features (e.g. PCA
-          scores)
-        pattern: "*.tsv"
-        ontologies:
-          - edam: http://edamontology.org/format_3475
+        description: Feature matrix file
+        pattern: "*"
+        ontologies: []
     - clusters:
         type: file
-        description: CSV/TSV file with sample_id and cluster assignment
-        pattern: "*_clusters.*"
+        description: Cluster assignment file
+        pattern: "*"
         ontologies: []
-  - out_prefix:
-      type: string
-      description: Prefix for output files
 output:
   metrics:
     - - meta:
diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index 13a417ac37fe..742df47b043d 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -282,4 +282,33 @@ def plot_curve(metric, title, ylabel, out_png):
 
 
 if __name__ == "__main__":
+    import sys
+    import platform
+
+    prefix = "${task.ext.prefix ?: meta.id}"
+
+    sys.argv = [
+        "cluster_metrics.py",
+        "--features", "$features",
+        "--clusters", "$clusters",
+        "--k-min", "2",
+        "--k-max", "12",
+        "--out-k-sweep", f"{prefix}_k_sweep.csv",
+        "--out-selected", f"{prefix}_selected.json",
+        "--out-prefix", prefix,
+    ]
+
     main()
+
+    import matplotlib
+    import pandas
+    import sklearn
+
+    with open("versions.yml", "w") as f:
+        f.write(
+            f'"${task.process}":\n'
+            f'    python: {platform.python_version()}\n'
+            f'    pandas: {pandas.__version__}\n'
+            f'    scikit-learn: {sklearn.__version__}\n'
+            f'    matplotlib: {matplotlib.__version__}\n'
+        )
diff --git a/modules/nf-core/custom/clustervisualiation/Dockerfile b/modules/nf-core/custom/clustervisualiation/Dockerfile
deleted file mode 100644
index 577062b77918..000000000000
--- a/modules/nf-core/custom/clustervisualiation/Dockerfile
+++ /dev/null
@@ -1,8 +0,0 @@
-FROM nfcore/base:2.0
-
-LABEL authors="dbaku42" \
-      description="Docker image containing all requirements for nf-core/custom/clustervisualiation"
-
-COPY environment.yml /
-RUN micromamba install -y -n base -f /environment.yml && \
-    micromamba clean -a -y
diff --git a/modules/nf-core/custom/clustervisualiation/main.nf b/modules/nf-core/custom/clustervisualiation/main.nf
index cd67ff46275b..a1d49f08dff3 100644
--- a/modules/nf-core/custom/clustervisualiation/main.nf
+++ b/modules/nf-core/custom/clustervisualiation/main.nf
@@ -2,12 +2,12 @@ process CUSTOM_CLUSTERVISUALIATION {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-    'https://depot.galaxyproject.org/singularity/clustervisualiation:dev' :
-    'quay.io/nf-core/clustervisualiation:dev' }"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+    'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c0/c00b83d40a02e4ed2833ebf0d38635602231a21764eff0d30ed16885e5c02445/data' :
+    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_umap-learn:2c4aaf377be5cd4a' }"
+
     input:
     tuple val(meta), path(features), path(clusters), path(pca_scores)
-    val out_prefix
 
     output:
     tuple val(meta), path("*_umap.tsv")   , emit: umap
diff --git a/modules/nf-core/custom/clustervisualiation/meta.yml b/modules/nf-core/custom/clustervisualiation/meta.yml
index 63bcd13f76ec..ebde48a4bbef 100644
--- a/modules/nf-core/custom/clustervisualiation/meta.yml
+++ b/modules/nf-core/custom/clustervisualiation/meta.yml
@@ -43,9 +43,6 @@ input:
         pattern: "*_pca_scores.tsv"
         ontologies:
           - edam: http://edamontology.org/format_3475
-  - out_prefix:
-      type: string
-      description: Prefix for output files
 output:
   umap:
     - - meta:
diff --git a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
index 63f6b0c24a10..df4d3913d758 100644
--- a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
@@ -200,7 +200,7 @@ def main() -> None:
     import sys
     import platform
 
-    prefix = "${task.ext.prefix ?: out_prefix ?: meta.id}"
+    prefix = "${task.ext.prefix ?: meta.id}"
 
     sys.argv = [
         "cluster_viz.py",
diff --git a/modules/nf-core/custom/clustervisualiation/tests/main.nf.test b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test
index f4695eedae1e..43c55045ae1e 100644
--- a/modules/nf-core/custom/clustervisualiation/tests/main.nf.test
+++ b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test
@@ -1,7 +1,7 @@
 nextflow_process {
-    name "Test Process CLUSTER_VIZ"
+    name "Test Process CUSTOM_CLUSTERVISUALIATION"
     script "../main.nf"
-    process "CLUSTER_VIZ"
+    process "CUSTOM_CLUSTERVISUALIATION"
     tag "modules"
     tag "modules_nfcore"
     tag "custom"

From 387cbebd2b89d9bba0c43996078ac9237cc4e297 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 11 May 2026 14:23:26 +0200
Subject: [PATCH 20/38] fix: address pinin4fjords follow-up review - template
 escaping, drop PCA orphans, fix versions.yml, rename clustervisualiation ->
 clustervisualization

---
 .gitignore                                    |  11 +
 modules/nf-core/custom/clustermetrics/main.nf |   6 +-
 .../templates/cluster_metrics.py              |  74 +++---
 .../custom/clustermetrics/tests/main.nf.test  |   4 +-
 .../clustermetrics/tests/main.nf.test.snap    |  38 +--
 .../custom/clustervisualiation/main.nf        |  26 --
 .../templates/cluster_viz.py                  | 237 ------------------
 .../clustervisualiation/tests/main.nf.test    |  54 ----
 .../tests/main.nf.test.snap                   | 151 -----------
 .../environment.yml                           |   0
 .../custom/clustervisualization/main.nf       |  43 ++++
 .../meta.yml                                  |   2 +-
 .../templates/cluster_viz.py                  | 229 +++++++++++++++++
 .../tests/data/test_clusters.csv              |   0
 .../tests/data/test_features.tsv              |   0
 .../tests/data/test_pca.eigenvec              |   0
 .../clustervisualization/tests/main.nf.test   |  47 ++++
 .../tests/main.nf.test.snap                   | 111 ++++++++
 18 files changed, 513 insertions(+), 520 deletions(-)
 delete mode 100644 modules/nf-core/custom/clustervisualiation/main.nf
 delete mode 100644 modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
 delete mode 100644 modules/nf-core/custom/clustervisualiation/tests/main.nf.test
 delete mode 100644 modules/nf-core/custom/clustervisualiation/tests/main.nf.test.snap
 rename modules/nf-core/custom/{clustervisualiation => clustervisualization}/environment.yml (100%)
 create mode 100644 modules/nf-core/custom/clustervisualization/main.nf
 rename modules/nf-core/custom/{clustervisualiation => clustervisualization}/meta.yml (98%)
 create mode 100644 modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
 rename modules/nf-core/custom/{clustervisualiation => clustervisualization}/tests/data/test_clusters.csv (100%)
 rename modules/nf-core/custom/{clustervisualiation => clustervisualization}/tests/data/test_features.tsv (100%)
 rename modules/nf-core/custom/{clustervisualiation => clustervisualization}/tests/data/test_pca.eigenvec (100%)
 create mode 100644 modules/nf-core/custom/clustervisualization/tests/main.nf.test
 create mode 100644 modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap

diff --git a/.gitignore b/.gitignore
index df5aafd3cc74..b9460e81b015 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,14 @@ test_output/
 tests/data/
 work/
 .github/CODEOWNERS-tmp
+modules/local/
+vcf_data/
+subworkflows/nf-core/snpclustering/modules/
+subworkflows/nf-core/snpclustering/run.log
+subworkflows/nf-core/snpclustering/run_test.nf
+subworkflows/nf-core/snpclustering/test_local.nf
+subworkflows/nf-core/snpclustering/scripts/
+subworkflows/nf-core/snpclustering/Dockerfile
+subworkflows/nf-core/snpclustering/tests/
+subworkflows/nf-core/snpclustering/main.nf
+modules/nf-core/clustering/
diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index 2d7a374ba295..e41e695f87f3 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -5,7 +5,7 @@ process CUSTOM_CLUSTERMETRICS {
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
     'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c9/c993602a6f49b387b34e84f41fda5a393355850b2dd6ab776f1307a0e7b9d540/data' :
     'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn:c378d29780adbcbf' }"
-    
+
     input:
     tuple val(meta), path(features), path(clusters)
 
@@ -28,6 +28,10 @@ process CUSTOM_CLUSTERMETRICS {
     touch ${prefix}_metrics.tsv
     touch ${prefix}_k_sweep.csv
     touch ${prefix}_selected.json
+    touch ${prefix}_elbow.png
+    touch ${prefix}_silhouette.png
+    touch ${prefix}_davies_bouldin.png
+    touch ${prefix}_calinski.png
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index 742df47b043d..01236a37ce3f 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -2,11 +2,14 @@
 
 import argparse
 import json
+import platform
+import sys
 from pathlib import Path
 
 import matplotlib
 import numpy as np
 import pandas as pd
+import sklearn
 from sklearn.cluster import KMeans
 from sklearn.metrics import (
     calinski_harabasz_score,
@@ -17,6 +20,18 @@
 matplotlib.use("Agg")
 
 
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string (nf-core standard)."""
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+
 def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
     df.columns = [str(c).lstrip("#") for c in df.columns]
@@ -190,15 +205,15 @@ def main() -> None:
             alignment_mode = "row_order_fallback"
         else:
             raise ValueError(
-                f"No overlapping sample_id between features and clusters.\n"
-                f"  features IDs (first 5): {sample_ids.head().tolist()}\n"
+                f"No overlapping sample_id between features and clusters.\\n"
+                f"  features IDs (first 5): {sample_ids.head().tolist()}\\n"
                 f"  clusters IDs (first 5): {list(clusters.index[:5])}"
             )
     else:
         if len(clusters_df) != len(sample_ids):
             raise ValueError(
-                "clusters CSV has no usable sample_id column and row counts do not match.\n"
-                f"  n_features={len(sample_ids)}\n"
+                "clusters CSV has no usable sample_id column and row counts do not match.\\n"
+                f"  n_features={len(sample_ids)}\\n"
                 f"  n_clusters={len(clusters_df)}"
             )
         x = x_df.values
@@ -278,37 +293,40 @@ def plot_curve(metric, title, ylabel, out_png):
             )
 
     except Exception as e:
-        Path("plot_warning.txt").write_text(f"Plotting failed: {e}\n")
+        Path("plot_warning.txt").write_text("Plotting failed: " + str(e) + "\\n")
+
+    # === VERSIONS.YML (fix review) ===
+    versions = {
+        "${task.process}": {
+            "python": platform.python_version(),
+            "pandas": pd.__version__,
+            "scikit-learn": sklearn.__version__,
+            "matplotlib": matplotlib.__version__,
+        }
+    }
+    with open("versions.yml", "w") as f:
+        f.write(format_yaml_like(versions))
 
 
 if __name__ == "__main__":
-    import sys
-    import platform
-
     prefix = "${task.ext.prefix ?: meta.id}"
 
     sys.argv = [
         "cluster_metrics.py",
-        "--features", "$features",
-        "--clusters", "$clusters",
-        "--k-min", "2",
-        "--k-max", "12",
-        "--out-k-sweep", f"{prefix}_k_sweep.csv",
-        "--out-selected", f"{prefix}_selected.json",
-        "--out-prefix", prefix,
+        "--features",
+        "$features",
+        "--clusters",
+        "$clusters",
+        "--k-min",
+        "2",
+        "--k-max",
+        "12",
+        "--out-k-sweep",
+        f"{prefix}_k_sweep.csv",
+        "--out-selected",
+        f"{prefix}_selected.json",
+        "--out-prefix",
+        prefix,
     ]
 
     main()
-
-    import matplotlib
-    import pandas
-    import sklearn
-
-    with open("versions.yml", "w") as f:
-        f.write(
-            f'"${task.process}":\n'
-            f'    python: {platform.python_version()}\n'
-            f'    pandas: {pandas.__version__}\n'
-            f'    scikit-learn: {sklearn.__version__}\n'
-            f'    matplotlib: {matplotlib.__version__}\n'
-        )
diff --git a/modules/nf-core/custom/clustermetrics/tests/main.nf.test b/modules/nf-core/custom/clustermetrics/tests/main.nf.test
index 351d1c00df79..1d6613fa3709 100644
--- a/modules/nf-core/custom/clustermetrics/tests/main.nf.test
+++ b/modules/nf-core/custom/clustermetrics/tests/main.nf.test
@@ -1,7 +1,7 @@
 nextflow_process {
     name "Test Process CLUSTER_METRICS"
     script "../main.nf"
-    process "CLUSTER_METRICS"
+    process "CUSTOM_CLUSTERMETRICS"
     tag "modules"
     tag "modules_nfcore"
     tag "custom"
@@ -13,7 +13,6 @@ nextflow_process {
             process {
                 """
                 input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true) ]
-                input[1] = 'test'
                 """
             }
         }
@@ -34,7 +33,6 @@ nextflow_process {
             process {
                 """
                 input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true) ]
-                input[1] = 'test'
                 """
             }
         }
diff --git a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
index 1e28bf786221..15deaf982452 100644
--- a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
@@ -26,10 +26,10 @@
                 ]
             ],
             [
-                "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                "versions.yml:md5,236501fe75ac914d4de40a2c42dbec6b"
             ]
         ],
-        "timestamp": "2026-05-06T17:29:16.510271878",
+        "timestamp": "2026-05-11T13:08:00.102276222",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -43,7 +43,7 @@
                         {
                             "id": "test"
                         },
-                        "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
+                        "test_metrics.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "1": [
@@ -51,7 +51,7 @@
                         {
                             "id": "test"
                         },
-                        "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
+                        "test_k_sweep.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "2": [
@@ -59,7 +59,7 @@
                         {
                             "id": "test"
                         },
-                        "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
+                        "test_selected.json:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "3": [
@@ -68,22 +68,22 @@
                             "id": "test"
                         },
                         [
-                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
-                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
-                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
-                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
+                            "test_calinski.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test_davies_bouldin.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test_elbow.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test_silhouette.png:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
                 "4": [
-                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                    "versions.yml:md5,48951b680bf5fe7b6b0242dd160a2618"
                 ],
                 "k_sweep": [
                     [
                         {
                             "id": "test"
                         },
-                        "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
+                        "test_k_sweep.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "metrics": [
@@ -91,7 +91,7 @@
                         {
                             "id": "test"
                         },
-                        "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
+                        "test_metrics.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "plots": [
@@ -100,10 +100,10 @@
                             "id": "test"
                         },
                         [
-                            "test_calinski.png:md5,a5db5a4340f5a203f8cfb1e27617eab1",
-                            "test_davies_bouldin.png:md5,b083077c690cc454c7094d6d0c72fdb9",
-                            "test_elbow.png:md5,e90e14974deb0776c9b784c1de8b8ef0",
-                            "test_silhouette.png:md5,b7f103711912855f97d8588e1be2d659"
+                            "test_calinski.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test_davies_bouldin.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test_elbow.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "test_silhouette.png:md5,d41d8cd98f00b204e9800998ecf8427e"
                         ]
                     ]
                 ],
@@ -112,15 +112,15 @@
                         {
                             "id": "test"
                         },
-                        "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
+                        "test_selected.json:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "versions": [
-                    "versions.yml:md5,00e23b98be698c459e9c94079b0164e0"
+                    "versions.yml:md5,48951b680bf5fe7b6b0242dd160a2618"
                 ]
             }
         ],
-        "timestamp": "2026-05-06T17:29:21.180634202",
+        "timestamp": "2026-05-11T13:05:52.932850421",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
diff --git a/modules/nf-core/custom/clustervisualiation/main.nf b/modules/nf-core/custom/clustervisualiation/main.nf
deleted file mode 100644
index a1d49f08dff3..000000000000
--- a/modules/nf-core/custom/clustervisualiation/main.nf
+++ /dev/null
@@ -1,26 +0,0 @@
-process CUSTOM_CLUSTERVISUALIATION {
-    tag "$meta.id"
-    label 'process_medium'
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-    'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c0/c00b83d40a02e4ed2833ebf0d38635602231a21764eff0d30ed16885e5c02445/data' :
-    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_umap-learn:2c4aaf377be5cd4a' }"
-
-    input:
-    tuple val(meta), path(features), path(clusters), path(pca_scores)
-
-    output:
-    tuple val(meta), path("*_umap.tsv")   , emit: umap
-    tuple val(meta), path("*_tsne.tsv")   , emit: tsne
-    tuple val(meta), path("*_umap.png")   , emit: umap_png
-    tuple val(meta), path("*_tsne.png")   , emit: tsne_png
-    tuple val(meta), path("*_pca.png")    , emit: pca_png
-    path "versions.yml"                   , emit: versions, topic: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    template 'cluster_viz.py'
-
-}
diff --git a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
deleted file mode 100644
index df4d3913d758..000000000000
--- a/modules/nf-core/custom/clustervisualiation/templates/cluster_viz.py
+++ /dev/null
@@ -1,237 +0,0 @@
-#!/usr/bin/env python3
-
-"""Cluster visualizations.
-
-Produces three 2D plots, all colored by cluster label:
-  - PCA (first two columns from pca_scores)
-  - UMAP (computed on the feature matrix used for clustering)
-  - t-SNE (computed on the feature matrix used for clustering)
-
-Also writes UMAP and t-SNE coordinates to TSV.
-"""
-
-import argparse
-
-import numpy as np
-import pandas as pd
-from sklearn.manifold import TSNE
-
-
-def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Handles the header formats that FlashPCA/PLINK2 produces:
-      - '#IID' (PLINK2 eigenvec: leading hash on first column)
-      - 'IID'  (FlashPCA / older PLINK)
-      - 'FID', 'IID' (two-column prefix)
-      - 'sample_id' (already normalised)
-    """
-    # Strip leading '#' (PLINK2 eigenvec writes '#IID' as the first column)
-    df = df.rename(columns=lambda c: c.lstrip("#"))
-
-    cols_upper = {c.upper(): c for c in df.columns}
-
-    # Remove duplicate header row (IID value == "FID" or "IID")
-    if "IID" in cols_upper:
-        iid_col = cols_upper["IID"]
-        dup_mask = df[iid_col].str.upper().isin({"FID", "IID"})
-        if dup_mask.any():
-            df = df[~dup_mask].copy().reset_index(drop=True)
-
-    cols_upper = {c.upper(): c for c in df.columns}
-
-    if "SAMPLE_ID" in cols_upper:
-        return df
-
-    if "IID" in cols_upper:
-        iid_col = cols_upper["IID"]
-        iid_numeric = pd.to_numeric(df[iid_col], errors="coerce").notna().all()
-        if iid_numeric:
-            df = df.drop(columns=[iid_col])
-            df = df.rename(columns={df.columns[0]: "sample_id"})
-        else:
-            df = df.rename(columns={iid_col: "sample_id"})
-
-        fid_cols = [c for c in df.columns if c.upper() == "FID"]
-        if fid_cols:
-            df = df.drop(columns=fid_cols)
-        return df
-
-    raise ValueError(f"Cannot find sample ID column (expected 'sample_id' or 'IID'). Found: {list(df.columns)}")
-
-
-def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
-    df = pd.read_csv(path, sep=r"\s+", engine="python", dtype=str)
-    df = _normalise_id_column(df)
-    sample_ids = df["sample_id"].astype(str)
-    x = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce").fillna(0.0)
-    return x, sample_ids
-
-
-def load_clusters(path: str) -> pd.Series:
-    df = pd.read_csv(path)
-    if "sample_id" not in df.columns or "cluster" not in df.columns:
-        raise ValueError(f"clusters file must have 'sample_id' and 'cluster' columns. Found: {list(df.columns)}")
-    return df.set_index(df["sample_id"].astype(str))["cluster"].astype(int)
-
-
-def safe_perplexity(n_samples: int, requested: float) -> float:
-    if n_samples <= 3:
-        return 1.0
-    upper = (n_samples - 1) / 3.0
-    return float(max(2.0, min(requested, upper)))
-
-
-def compute_umap(x: np.ndarray, n_neighbors: int, min_dist: float) -> np.ndarray:
-    import umap
-    return umap.UMAP(
-        n_components=2,
-        n_neighbors=n_neighbors,
-        min_dist=min_dist,
-        random_state=42,
-    ).fit_transform(x)
-
-def compute_tsne(x: np.ndarray, perplexity: float, max_iter: int) -> np.ndarray:
-    return TSNE(
-        n_components=2,
-        perplexity=perplexity,
-        init="pca",
-        random_state=42,
-        max_iter=max_iter,
-        learning_rate="auto",
-    ).fit_transform(x)
-
-
-def plot_scatter(df, x, y, out_png, title, xlabel=None, ylabel=None):
-    import matplotlib.pyplot as plt
-    from matplotlib.lines import Line2D
-
-    plt.figure(figsize=(7, 5))
-    labels = df["cluster"].astype(int).values
-    uniq = np.unique(labels)
-    sc = plt.scatter(df[x], df[y], c=labels, cmap="Paired", s=24, linewidths=0.4, alpha=0.85)
-    plt.title(title)
-    plt.xlabel(xlabel or x)
-    plt.ylabel(ylabel or y)
-    plt.grid(True, alpha=0.5)
-    handles = [
-        Line2D(
-            [0],
-            [0],
-            marker="o",
-            linestyle="",
-            markersize=7,
-            markerfacecolor=sc.cmap(sc.norm(k)),
-            markeredgecolor="none",
-            label=f"Cluster {k}",
-        )
-        for k in uniq
-    ]
-    plt.legend(
-        handles=handles,
-        title="Clusters",
-        loc="center left",
-        bbox_to_anchor=(1.02, 0.5),
-        borderaxespad=0.0,
-        frameon=True,
-    )
-    plt.tight_layout()
-    plt.savefig(out_png, dpi=200, bbox_inches="tight")
-    plt.close()
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser(description="PCA + UMAP + t-SNE plots colored by cluster")
-    ap.add_argument("--features", required=True)
-    ap.add_argument("--clusters", required=True)
-    ap.add_argument("--pca-scores", required=True)
-    ap.add_argument("--tsne-perplexity", type=float, default=30.0)
-    ap.add_argument("--tsne-iter", type=int, default=1000)
-    ap.add_argument("--umap-neighbors", type=int, default=15)
-    ap.add_argument("--umap-min-dist", type=float, default=0.1)
-    ap.add_argument("--out-umap-tsv", required=True)
-    ap.add_argument("--out-tsne-tsv", required=True)
-    ap.add_argument("--out-umap-png", required=True)
-    ap.add_argument("--out-tsne-png", required=True)
-    ap.add_argument("--out-pca-png", required=True)
-    args = ap.parse_args()
-
-    x_df, sample_ids = load_features(args.features)
-    clusters = load_clusters(args.clusters)
-
-    common = sample_ids[sample_ids.isin(clusters.index)]
-    if len(common) == 0:
-        raise ValueError(
-            f"No overlapping sample_id between features and clusters.\n"
-            f"  features IDs (first 5): {sample_ids.head().tolist()}\n"
-            f"  clusters IDs (first 5): {list(clusters.index[:5])}"
-        )
-
-    x = x_df.loc[common.index].values
-    y = clusters.loc[common.values].values
-
-    umap_coords = compute_umap(x, args.umap_neighbors, args.umap_min_dist)
-    umap_df = pd.DataFrame(
-        {
-            "sample_id": common.values,
-            "x": umap_coords[:, 0],
-            "y": umap_coords[:, 1],
-            "cluster": y,
-        }
-    )
-    umap_df.to_csv(args.out_umap_tsv, sep="\t", index=False)
-    plot_scatter(umap_df, "x", "y", args.out_umap_png, "UMAP embedding")
-
-    perp = safe_perplexity(len(common), args.tsne_perplexity)
-    tsne_coords = compute_tsne(x, perp, args.tsne_iter)
-    tsne_df = pd.DataFrame(
-        {
-            "sample_id": common.values,
-            "x": tsne_coords[:, 0],
-            "y": tsne_coords[:, 1],
-            "cluster": y,
-        }
-    )
-    tsne_df.to_csv(args.out_tsne_tsv, sep="\t", index=False)
-    plot_scatter(tsne_df, "x", "y", args.out_tsne_png, f"t-SNE (perplexity={perp:.1f})")
-
-
-
-if __name__ == "__main__":
-    import sys
-    import platform
-
-    prefix = "${task.ext.prefix ?: meta.id}"
-
-    sys.argv = [
-        "cluster_viz.py",
-        "--features", "$features",
-        "--clusters", "$clusters",
-        "--pca-scores", "$pca_scores",
-        "--out-umap-tsv", f"{prefix}_umap.tsv",
-        "--out-tsne-tsv", f"{prefix}_tsne.tsv",
-        "--out-umap-png", f"{prefix}_umap.png",
-        "--out-tsne-png", f"{prefix}_tsne.png",
-        "--out-pca-png", f"{prefix}_pca.png",
-    ]
-
-    main()
-
-    import matplotlib
-    import pandas
-    import sklearn
-
-    try:
-        import umap
-        umap_version = umap.__version__
-    except Exception:
-        umap_version = "N/A"
-
-    with open("versions.yml", "w") as f:
-        f.write(
-            f'"${task.process}":\n'
-            f'    python: {platform.python_version()}\n'
-            f'    pandas: {pandas.__version__}\n'
-            f'    scikit-learn: {sklearn.__version__}\n'
-            f'    umap-learn: {umap_version}\n'
-            f'    matplotlib: {matplotlib.__version__}\n'
-        )
diff --git a/modules/nf-core/custom/clustervisualiation/tests/main.nf.test b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test
deleted file mode 100644
index 43c55045ae1e..000000000000
--- a/modules/nf-core/custom/clustervisualiation/tests/main.nf.test
+++ /dev/null
@@ -1,54 +0,0 @@
-nextflow_process {
-    name "Test Process CUSTOM_CLUSTERVISUALIATION"
-    script "../main.nf"
-    process "CUSTOM_CLUSTERVISUALIATION"
-    tag "modules"
-    tag "modules_nfcore"
-    tag "custom"
-    tag "custom/clustervisualiation"
-    tag "clustervisualiation"
-
-    test("clustervisualiation - features clusters pca") {
-        when {
-            process {
-                """
-                input[0] = [ [id:'test'],
-                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec", checkIfExists: true) ]
-                input[1] = 'test'
-                """
-            }
-        }
-        then {
-            assert process.success
-            assert snapshot(
-                process.out.umap,
-                process.out.tsne,
-                process.out.umap_png,
-                process.out.tsne_png,
-                process.out.pca_png,
-                process.out.versions
-            ).match()
-        }
-    }
-
-    test("clustervisualiation - features clusters pca - stub") {
-        options "-stub"
-        when {
-            process {
-                """
-                input[0] = [ [id:'test'],
-                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec", checkIfExists: true) ]
-                input[1] = 'test'
-                """
-            }
-        }
-        then {
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-    }
-}
diff --git a/modules/nf-core/custom/clustervisualiation/tests/main.nf.test.snap b/modules/nf-core/custom/clustervisualiation/tests/main.nf.test.snap
deleted file mode 100644
index bc8ca92380e9..000000000000
--- a/modules/nf-core/custom/clustervisualiation/tests/main.nf.test.snap
+++ /dev/null
@@ -1,151 +0,0 @@
-{
-    "clustervisualiation - features clusters pca": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_tsne.tsv:md5,fc79b712722096f127cdb80c269380fb"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
-                ]
-            ],
-            [
-                "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
-            ]
-        ],
-        "timestamp": "2026-05-06T17:29:34.854148226",
-        "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.09.0"
-        }
-    },
-    "clustervisualiation - features clusters pca - stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_tsne.tsv:md5,fc79b712722096f127cdb80c269380fb"
-                    ]
-                ],
-                "2": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
-                    ]
-                ],
-                "3": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
-                    ]
-                ],
-                "4": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
-                    ]
-                ],
-                "5": [
-                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
-                ],
-                "pca_png": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_pca.png:md5,a659f260b13e0351dd28cbb5163d6a20"
-                    ]
-                ],
-                "tsne": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_tsne.tsv:md5,fc79b712722096f127cdb80c269380fb"
-                    ]
-                ],
-                "tsne_png": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_tsne.png:md5,aeacfdc6d9e35dd27406e70ae9220715"
-                    ]
-                ],
-                "umap": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_umap.tsv:md5,feda69a2108d84b70c3937a1cb84e661"
-                    ]
-                ],
-                "umap_png": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_umap.png:md5,470fab262187541b0ad52a3138bf3734"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,786af5c4301c54553001db08a8c9db5b"
-                ]
-            }
-        ],
-        "timestamp": "2026-05-06T17:29:46.705755643",
-        "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.09.0"
-        }
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustervisualiation/environment.yml b/modules/nf-core/custom/clustervisualization/environment.yml
similarity index 100%
rename from modules/nf-core/custom/clustervisualiation/environment.yml
rename to modules/nf-core/custom/clustervisualization/environment.yml
diff --git a/modules/nf-core/custom/clustervisualization/main.nf b/modules/nf-core/custom/clustervisualization/main.nf
new file mode 100644
index 000000000000..4c4949fb60d6
--- /dev/null
+++ b/modules/nf-core/custom/clustervisualization/main.nf
@@ -0,0 +1,43 @@
+process CUSTOM_CLUSTERVISUALIZATION {
+    tag "$meta.id"
+    label 'process_medium'
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c9/c993602a6f49b387b34e84f41fda5a393355850b2dd6ab776f1307a0e7b9d540/data' :
+        'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_seaborn_umap-learn:c378d29780adbcbf' }"
+
+    input:
+    tuple val(meta), path(features), path(clusters)
+
+    output:
+    tuple val(meta), path("*.umap.tsv") , emit: umap_tsv
+    tuple val(meta), path("*.tsne.tsv") , emit: tsne_tsv
+    tuple val(meta), path("*.umap.png") , emit: umap_png, optional: true
+    tuple val(meta), path("*.tsne.png") , emit: tsne_png, optional: true
+    path "versions.yml"                 , emit: versions, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    template 'cluster_viz.py'
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.umap.tsv
+    touch ${prefix}.tsne.tsv
+    touch ${prefix}.umap.png
+    touch ${prefix}.tsne.png
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python3 --version | sed 's/Python //')
+        pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
+        matplotlib: \$(python3 -c "import matplotlib; print(matplotlib.__version__)")
+        seaborn: \$(python3 -c "import seaborn; print(seaborn.__version__)")
+        umap-learn: \$(python3 -c "import umap; print(umap.__version__)")
+        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/custom/clustervisualiation/meta.yml b/modules/nf-core/custom/clustervisualization/meta.yml
similarity index 98%
rename from modules/nf-core/custom/clustervisualiation/meta.yml
rename to modules/nf-core/custom/clustervisualization/meta.yml
index ebde48a4bbef..a7f02374f268 100644
--- a/modules/nf-core/custom/clustervisualiation/meta.yml
+++ b/modules/nf-core/custom/clustervisualization/meta.yml
@@ -1,4 +1,4 @@
-name: "CUSTOM_CLUSTERVISUALIATION"
+name: "CUSTOM_CLUSTERVISUALIZATION"
 description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster"
 keywords:
   - clustering
diff --git a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
new file mode 100644
index 000000000000..c67391521f6c
--- /dev/null
+++ b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+
+import platform
+import sys
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import sklearn
+from sklearn.manifold import TSNE
+from umap import UMAP
+import umap as umap_module
+
+matplotlib.use("Agg")
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string (nf-core standard)."""
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+
+def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    df.columns = [str(c).lstrip("#") for c in df.columns]
+
+    cols_upper = {str(c).upper(): c for c in df.columns}
+
+    if "IID" in cols_upper:
+        iid_col = cols_upper["IID"]
+        dup_mask = df[iid_col].astype(str).str.upper().isin({"FID", "IID"})
+        if dup_mask.any():
+            df = df.loc[~dup_mask].copy().reset_index(drop=True)
+
+    cols_upper = {str(c).upper(): c for c in df.columns}
+
+    if "SAMPLE_ID" in cols_upper:
+        sample_col = cols_upper["SAMPLE_ID"]
+        if sample_col != "sample_id":
+            df = df.rename(columns={sample_col: "sample_id"})
+        return df
+
+    if "IID" in cols_upper:
+        iid_col = cols_upper["IID"]
+        iid_numeric = pd.to_numeric(df[iid_col], errors="coerce").notna().all()
+
+        if iid_numeric:
+            df = df.drop(columns=[iid_col])
+            if len(df.columns) == 0:
+                raise ValueError("Cannot infer sample_id after dropping numeric IID column")
+            df = df.rename(columns={df.columns[0]: "sample_id"})
+        else:
+            df = df.rename(columns={iid_col: "sample_id"})
+
+        fid_cols = [c for c in df.columns if str(c).upper() == "FID"]
+        if fid_cols:
+            df = df.drop(columns=fid_cols)
+
+        return df
+
+    raise ValueError(f"Cannot find sample ID column (expected 'sample_id' or 'IID'). Found: {list(df.columns)}")
+
+
+def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
+    df = pd.read_csv(path, sep="\\t", dtype=str)
+    df = _normalise_id_column(df)
+
+    if "sample_id" not in df.columns:
+        raise ValueError("features file must contain a sample_id column after normalization")
+
+    sample_ids = df["sample_id"].astype(str)
+    x = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce")
+    x = x.fillna(x.mean(numeric_only=True))
+    x = x.fillna(0.0)
+
+    return x, sample_ids
+
+
+def load_clusters(path: str) -> tuple[pd.DataFrame, str]:
+    """Load clusters and return (df, mode). Same logic as cluster_metrics."""
+    df = pd.read_csv(path, sep=",", dtype=str)
+    df = df.copy()
+    df.columns = [str(c).lstrip("#") for c in df.columns]
+
+    cols_upper = {str(c).upper(): c for c in df.columns}
+
+    if "CLUSTER" not in cols_upper:
+        raise ValueError("clusters CSV must have a 'cluster' column")
+
+    cluster_col = cols_upper["CLUSTER"]
+
+    if "SAMPLE_ID" in cols_upper:
+        sample_col = cols_upper["SAMPLE_ID"]
+        out = df[[sample_col, cluster_col]].copy()
+        out.columns = ["sample_id", "cluster"]
+        out["sample_id"] = out["sample_id"].astype(str)
+        out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
+        return out, "sample_id"
+
+    try:
+        norm = _normalise_id_column(df.copy())
+        if "sample_id" in norm.columns and "cluster" in norm.columns:
+            out = norm[["sample_id", "cluster"]].copy()
+            out["sample_id"] = out["sample_id"].astype(str)
+            out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
+            return out, "sample_id"
+    except Exception:
+        pass
+
+    other_cols = [c for c in df.columns if c != cluster_col]
+
+    if len(other_cols) == 1:
+        candidate = other_cols[0]
+        candidate_vals = df[candidate].astype(str)
+
+        if not (
+            len(candidate_vals) > 0 and float(pd.to_numeric(candidate_vals, errors="coerce").notna().mean()) >= 0.8
+        ):
+            out = pd.DataFrame(
+                {
+                    "sample_id": candidate_vals,
+                    "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int),
+                }
+            )
+            return out, "sample_id"
+
+    out = pd.DataFrame({"cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int)})
+    return out, "row_order"
+
+
+def plot_embedding(x: np.ndarray, labels: np.ndarray, method: str, prefix: str) -> None:
+    """Plot UMAP or t-SNE with cluster coloring."""
+    if method == "umap":
+        reducer = UMAP(random_state=42)
+        embedding = reducer.fit_transform(x)
+        title = "UMAP"
+        out_tsv = f"{prefix}.umap.tsv"
+        out_png = f"{prefix}.umap.png"
+    else:  # tsne
+        reducer = TSNE(n_components=2, random_state=42, perplexity=min(30, len(x) - 1))
+        embedding = reducer.fit_transform(x)
+        title = "t-SNE"
+        out_tsv = f"{prefix}.tsne.tsv"
+        out_png = f"{prefix}.tsne.png"
+
+    # Save embedding
+    emb_df = pd.DataFrame(embedding, columns=["Dim1", "Dim2"])
+    emb_df["cluster"] = labels
+    emb_df.to_csv(out_tsv, sep="\\t", index=False)
+
+    # Plot
+    plt.figure(figsize=(8, 6))
+    palette = sns.color_palette("tab10", n_colors=len(np.unique(labels)))
+    sns.scatterplot(
+        x=embedding[:, 0],
+        y=embedding[:, 1],
+        hue=labels.astype(str),
+        palette=palette,
+        alpha=0.8,
+        s=60,
+        edgecolor="k",
+        linewidth=0.3,
+    )
+    plt.title(f"{title} projection of features colored by cluster")
+    plt.xlabel(f"{title} 1")
+    plt.ylabel(f"{title} 2")
+    plt.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc="upper left")
+    plt.tight_layout()
+    plt.savefig(out_png, dpi=200, bbox_inches="tight")
+    plt.close()
+
+
+def main() -> None:
+    features = "$features"
+    clusters_path = "$clusters"
+    prefix = "${task.ext.prefix ?: meta.id}"
+
+    x_df, sample_ids = load_features(features)
+    clusters_df, cluster_mode = load_clusters(clusters_path)
+
+    if cluster_mode == "sample_id":
+        clusters = clusters_df.set_index("sample_id")["cluster"]
+        common = sample_ids[sample_ids.isin(clusters.index)]
+        if len(common) > 0:
+            x = x_df.loc[common.index].values
+            labels = clusters.loc[common.values].values
+        elif len(clusters_df) == len(sample_ids):
+            x = x_df.values
+            labels = clusters_df["cluster"].values
+        else:
+            raise ValueError("No overlapping sample_id between features and clusters")
+    else:
+        if len(clusters_df) != len(sample_ids):
+            raise ValueError("Row counts do not match and no sample_id column found")
+        x = x_df.values
+        labels = clusters_df["cluster"].values
+
+    if len(x) < 2:
+        raise ValueError("Need at least 2 samples for embedding")
+
+    # Generate both embeddings
+    plot_embedding(x, labels, "umap", prefix)
+    plot_embedding(x, labels, "tsne", prefix)
+
+    # versions.yml
+    versions = {
+        "${task.process}": {
+            "python": platform.python_version(),
+            "pandas": pd.__version__,
+            "matplotlib": matplotlib.__version__,
+            "seaborn": sns.__version__,
+            "umap-learn": umap_module.__version__,
+            "scikit-learn": sklearn.__version__,
+        }
+    }
+    with open("versions.yml", "w") as f:
+        f.write(format_yaml_like(versions))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv b/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv
similarity index 100%
rename from modules/nf-core/custom/clustervisualiation/tests/data/test_clusters.csv
rename to modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv
diff --git a/modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv b/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv
similarity index 100%
rename from modules/nf-core/custom/clustervisualiation/tests/data/test_features.tsv
rename to modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv
diff --git a/modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec b/modules/nf-core/custom/clustervisualization/tests/data/test_pca.eigenvec
similarity index 100%
rename from modules/nf-core/custom/clustervisualiation/tests/data/test_pca.eigenvec
rename to modules/nf-core/custom/clustervisualization/tests/data/test_pca.eigenvec
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test b/modules/nf-core/custom/clustervisualization/tests/main.nf.test
new file mode 100644
index 000000000000..76e03b14c860
--- /dev/null
+++ b/modules/nf-core/custom/clustervisualization/tests/main.nf.test
@@ -0,0 +1,47 @@
+nextflow_process {
+    name "Test Process CUSTOM_CLUSTERVISUALIZATION"
+    script "../main.nf"
+    process "CUSTOM_CLUSTERVISUALIZATION"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "custom/clustervisualization"
+    tag "clustervisualization"
+
+    test("clustervisualization - features and clusters") {
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'],
+                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true) ]
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(
+                process.out.umap_tsv,
+                process.out.tsne_tsv,
+                process.out.versions
+            ).match()
+        }
+    }
+
+    test("clustervisualization - features and clusters - stub") {
+        options "-stub"
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'],
+                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true) ]
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+}
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap b/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
new file mode 100644
index 000000000000..610912e901ea
--- /dev/null
+++ b/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
@@ -0,0 +1,111 @@
+{
+    "clustervisualization - features and clusters - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.umap.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsne.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.umap.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsne.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    "versions.yml:md5,a50bc5907c3a28f12238b5497e3f4c67"
+                ],
+                "tsne_png": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsne.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "tsne_tsv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.tsne.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "umap_png": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.umap.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "umap_tsv": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.umap.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,a50bc5907c3a28f12238b5497e3f4c67"
+                ]
+            }
+        ],
+        "timestamp": "2026-05-11T14:15:13.987219333",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    },
+    "clustervisualization - features and clusters": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.umap.tsv:md5,2cba3fa6ba2d3ce80ad884b4210403eb"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.tsne.tsv:md5,738a97587fa8c72614d2655eddbd2f7a"
+                ]
+            ],
+            [
+                "versions.yml:md5,43b533ced227b510ca833d01881efc8e"
+            ]
+        ],
+        "timestamp": "2026-05-11T14:15:07.547048716",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    }
+}
\ No newline at end of file

From 4ed380b14761a7dd2f516ba5ac7e4830b9331ec5 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 11 May 2026 16:36:40 +0200
Subject: [PATCH 21/38] feat(custom/clustervisualization): add UMAP and t-SNE
 cluster visualization module

---
 .../clustervisualization/environment.yml      |  1 +
 .../custom/clustervisualization/main.nf       |  4 +-
 .../custom/clustervisualization/meta.yml      | 71 ++++++++-----------
 3 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualization/environment.yml b/modules/nf-core/custom/clustervisualization/environment.yml
index 803fb67fb108..b68e2333b2f2 100644
--- a/modules/nf-core/custom/clustervisualization/environment.yml
+++ b/modules/nf-core/custom/clustervisualization/environment.yml
@@ -8,4 +8,5 @@ dependencies:
   - pandas=2.2.*
   - python=3.12
   - scikit-learn=1.5.*
+  - seaborn=0.13.*
   - umap-learn=0.5.*
diff --git a/modules/nf-core/custom/clustervisualization/main.nf b/modules/nf-core/custom/clustervisualization/main.nf
index 4c4949fb60d6..9120f2e18f2f 100644
--- a/modules/nf-core/custom/clustervisualization/main.nf
+++ b/modules/nf-core/custom/clustervisualization/main.nf
@@ -2,9 +2,7 @@ process CUSTOM_CLUSTERVISUALIZATION {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c9/c993602a6f49b387b34e84f41fda5a393355850b2dd6ab776f1307a0e7b9d540/data' :
-        'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_seaborn_umap-learn:c378d29780adbcbf' }"
+    container "community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:9579c043ac096a34"
 
     input:
     tuple val(meta), path(features), path(clusters)
diff --git a/modules/nf-core/custom/clustervisualization/meta.yml b/modules/nf-core/custom/clustervisualization/meta.yml
index a7f02374f268..0d90ab6fa53d 100644
--- a/modules/nf-core/custom/clustervisualization/meta.yml
+++ b/modules/nf-core/custom/clustervisualization/meta.yml
@@ -1,5 +1,5 @@
 name: "CUSTOM_CLUSTERVISUALIZATION"
-description: "Generates PCA, UMAP and t-SNE visualizations colored by cluster"
+description: "Generates UMAP and t-SNE visualizations colored by cluster"
 keywords:
   - clustering
   - visualization
@@ -8,14 +8,14 @@ keywords:
   - tsne
   - dimension-reduction
 tools:
-  - "scikit-learn":
+  - scikit-learn:
       description: "Machine learning library for dimension reduction (PCA, t-SNE)"
       homepage: "https://scikit-learn.org/"
       documentation: "https://scikit-learn.org/stable/modules/clustering.html"
       licence:
         - "BSD-3-Clause"
       identifier: ""
-  - "umap-learn":
+  - umap-learn:
       description: "Uniform Manifold Approximation and Projection for dimension reduction"
       homepage: "https://umap-learn.readthedocs.io/"
       documentation: "https://umap-learn.readthedocs.io/en/latest/"
@@ -25,76 +25,63 @@ tools:
 input:
   - - meta:
         type: map
-        description: Groovy Map containing sample information
+        description: "Groovy Map containing sample information"
     - features:
         type: file
-        description: TSV file with sample_id and numeric features
+        description: "TSV file with sample_id and numeric features"
         pattern: "*.tsv"
         ontologies:
-          - edam: http://edamontology.org/format_3475
+          - edam: "http://edamontology.org/format_3475"
     - clusters:
         type: file
-        description: CSV/TSV file with sample_id and cluster assignment
+        description: "CSV/TSV file with sample_id and cluster assignment"
         pattern: "*_clusters.*"
         ontologies: []
-    - pca_scores:
-        type: file
-        description: TSV file with PCA scores from previous step
-        pattern: "*_pca_scores.tsv"
-        ontologies:
-          - edam: http://edamontology.org/format_3475
 output:
-  umap:
+  umap_tsv:
     - - meta:
           type: map
-          description: Groovy Map containing sample information
-      - "*_umap.tsv":
+          description: "Groovy Map containing sample information"
+      - "*.umap.tsv":
           type: file
-          description: UMAP coordinates TSV file
-          pattern: "*_umap.tsv"
+          description: "UMAP coordinates per sample"
+          pattern: "*.umap.tsv"
           ontologies:
+            - edam: "http://edamontology.org/operation_2432"
             - edam: http://edamontology.org/format_3475
-  tsne:
+  tsne_tsv:
     - - meta:
           type: map
-          description: Groovy Map containing sample information
-      - "*_tsne.tsv":
+          description: "Groovy Map containing sample information"
+      - "*.tsne.tsv":
           type: file
-          description: t-SNE coordinates TSV file
-          pattern: "*_tsne.tsv"
+          description: "t-SNE coordinates per sample"
+          pattern: "*.tsne.tsv"
           ontologies:
+            - edam: "http://edamontology.org/operation_2432"
             - edam: http://edamontology.org/format_3475
   umap_png:
     - - meta:
           type: map
-          description: Groovy Map containing sample information
-      - "*_umap.png":
+          description: "Groovy Map containing sample information"
+      - "*.umap.png":
           type: file
-          description: UMAP visualization plot
-          pattern: "*_umap.png"
+          description: "UMAP visualization coloured by cluster"
+          pattern: "*.umap.png"
           ontologies: []
   tsne_png:
     - - meta:
           type: map
-          description: Groovy Map containing sample information
-      - "*_tsne.png":
+          description: "Groovy Map containing sample information"
+      - "*.tsne.png":
           type: file
-          description: t-SNE visualization plot
-          pattern: "*_tsne.png"
-          ontologies: []
-  pca_png:
-    - - meta:
-          type: map
-          description: Groovy Map containing sample information
-      - "*_pca.png":
-          type: file
-          description: PCA visualization plot
-          pattern: "*_pca.png"
+          description: "t-SNE visualization coloured by cluster"
+          pattern: "*.tsne.png"
           ontologies: []
   versions:
-    - "versions.yml":
+    - versions.yml:
         type: file
-        description: File containing software versions
+        description: "Software versions used in the module"
         pattern: "versions.yml"
         ontologies:
           - edam: http://edamontology.org/format_3750

From 4fe91e77088c215e325671c6c66991f9404ab512 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 11 May 2026 16:56:11 +0200
Subject: [PATCH 22/38] fix: apply ruff formatting to cluster_viz.py template

---
 .../custom/clustervisualization/templates/cluster_viz.py       | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
index c67391521f6c..f06f76271a30 100644
--- a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 
 import platform
-import sys
 
 import matplotlib
 import matplotlib.pyplot as plt
@@ -9,9 +8,9 @@
 import pandas as pd
 import seaborn as sns
 import sklearn
+import umap as umap_module
 from sklearn.manifold import TSNE
 from umap import UMAP
-import umap as umap_module
 
 matplotlib.use("Agg")
 

From 09802089e5814fcbb7cb93caebacc8397c4b7625 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Mon, 11 May 2026 20:09:26 +0200
Subject: [PATCH 23/38] fix: align clustermetrics and clustervisualization envs
 and containers

---
 modules/nf-core/custom/clustermetrics/environment.yml | 11 +++++++----
 modules/nf-core/custom/clustermetrics/main.nf         |  5 ++---
 .../custom/clustervisualization/environment.yml       |  2 +-
 modules/nf-core/custom/clustervisualization/main.nf   |  5 +++--
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/environment.yml b/modules/nf-core/custom/clustermetrics/environment.yml
index 616821c92ff9..f4cde46c06ab 100644
--- a/modules/nf-core/custom/clustermetrics/environment.yml
+++ b/modules/nf-core/custom/clustermetrics/environment.yml
@@ -4,7 +4,10 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - matplotlib=3.9.*
-  - pandas=2.2.*
-  - python=3.12
-  - scikit-learn=1.5.*
+  - conda-forge::matplotlib=3.9.4
+  - conda-forge::numpy=2.4.2
+  - conda-forge::pandas=2.3.2
+  - conda-forge::python=3.12.12
+  - conda-forge::scikit-learn=1.8.0
+  - conda-forge::seaborn=0.13.2
+  - conda-forge::umap-learn=0.5.12
diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index e41e695f87f3..fc9678950dfa 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -3,9 +3,8 @@ process CUSTOM_CLUSTERMETRICS {
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-    'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c9/c993602a6f49b387b34e84f41fda5a393355850b2dd6ab776f1307a0e7b9d540/data' :
-    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn:c378d29780adbcbf' }"
-
+      'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92185ca35119bf65d5f45eb57c71dc489edcd209056c32162fc6045fceda6dd6/data' :
+      'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
     input:
     tuple val(meta), path(features), path(clusters)
 
diff --git a/modules/nf-core/custom/clustervisualization/environment.yml b/modules/nf-core/custom/clustervisualization/environment.yml
index b68e2333b2f2..c2cf95a372d2 100644
--- a/modules/nf-core/custom/clustervisualization/environment.yml
+++ b/modules/nf-core/custom/clustervisualization/environment.yml
@@ -7,6 +7,6 @@ dependencies:
   - matplotlib=3.9.*
   - pandas=2.2.*
   - python=3.12
-  - scikit-learn=1.5.*
+  - scikit-learn=1.6.*
   - seaborn=0.13.*
   - umap-learn=0.5.*
diff --git a/modules/nf-core/custom/clustervisualization/main.nf b/modules/nf-core/custom/clustervisualization/main.nf
index 9120f2e18f2f..ed8ab229bf6e 100644
--- a/modules/nf-core/custom/clustervisualization/main.nf
+++ b/modules/nf-core/custom/clustervisualization/main.nf
@@ -2,8 +2,9 @@ process CUSTOM_CLUSTERVISUALIZATION {
     tag "$meta.id"
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
-    container "community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:9579c043ac096a34"
-
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+      'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92185ca35119bf65d5f45eb57c71dc489edcd209056c32162fc6045fceda6dd6/data' :
+      'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
     input:
     tuple val(meta), path(features), path(clusters)
 

From f240623c4b597e7efc3b9d7d8fe4410c217bf0bb Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Tue, 12 May 2026 13:19:57 +0200
Subject: [PATCH 24/38] fix: use docker:// prefix for singularity container to
 enable OCI conversion

---
 modules/nf-core/custom/clustermetrics/main.nf       | 4 ++--
 modules/nf-core/custom/clustervisualization/main.nf | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index fc9678950dfa..67802950c6c2 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -3,8 +3,8 @@ process CUSTOM_CLUSTERMETRICS {
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-      'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92185ca35119bf65d5f45eb57c71dc489edcd209056c32162fc6045fceda6dd6/data' :
-      'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
+    'docker://community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' :
+    'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
     input:
     tuple val(meta), path(features), path(clusters)
 
diff --git a/modules/nf-core/custom/clustervisualization/main.nf b/modules/nf-core/custom/clustervisualization/main.nf
index ed8ab229bf6e..a96b0e342c6b 100644
--- a/modules/nf-core/custom/clustervisualization/main.nf
+++ b/modules/nf-core/custom/clustervisualization/main.nf
@@ -3,8 +3,8 @@ process CUSTOM_CLUSTERVISUALIZATION {
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-      'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92185ca35119bf65d5f45eb57c71dc489edcd209056c32162fc6045fceda6dd6/data' :
-      'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
+    'docker://community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' :
+    'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
     input:
     tuple val(meta), path(features), path(clusters)
 

From c46c26cac99b2f056b1bba84f2fb4e0db3ece604 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Tue, 12 May 2026 14:39:54 +0200
Subject: [PATCH 25/38] fix(custom/clustervisualization): set NUMBA_CACHE_DIR
 and MPLCONFIGDIR to fix numba caching in Singularity

---
 .../custom/clustervisualization/templates/cluster_viz.py     | 5 ++++-
 .../nf-core/custom/clustervisualization/tests/main.nf.test   | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
index f06f76271a30..e28d3d5262f5 100644
--- a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 import platform
-
+import os
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
@@ -12,6 +12,9 @@
 from sklearn.manifold import TSNE
 from umap import UMAP
 
+# Fix numba + matplotlib in read-only Singularity container
+os.environ['NUMBA_CACHE_DIR'] = '/tmp'
+os.environ['MPLCONFIGDIR']     = '/tmp'
 matplotlib.use("Agg")
 
 
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test b/modules/nf-core/custom/clustervisualization/tests/main.nf.test
index 76e03b14c860..bda59809d0b5 100644
--- a/modules/nf-core/custom/clustervisualization/tests/main.nf.test
+++ b/modules/nf-core/custom/clustervisualization/tests/main.nf.test
@@ -10,6 +10,9 @@ nextflow_process {
 
     test("clustervisualization - features and clusters") {
         when {
+            params {
+                nf_test = true
+             }
             process {
                 """
                 input[0] = [ [id:'test'],

From ad39971648d380fa5d29149b000180041b36e7d4 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Tue, 12 May 2026 16:09:31 +0200
Subject: [PATCH 26/38] fix(custom/clustervisualization): move NUMBA_CACHE_DIR
 fix before any imports, and KMean n_init stable

---
 .../custom/clustermetrics/templates/cluster_metrics.py   | 2 +-
 .../custom/clustervisualization/templates/cluster_viz.py | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index 01236a37ce3f..5be8bac0bfc2 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -236,7 +236,7 @@ def main() -> None:
     rows = []
     max_k = min(int(args.k_max), len(x))
     for k in range(int(args.k_min), max_k + 1):
-        model = KMeans(n_clusters=k, n_init="auto", random_state=42)
+        model = KMeans(n_clusters=k, n_init=10, random_state=42)
         y = model.fit_predict(x)
 
         sil = ch = db = None
diff --git a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
index e28d3d5262f5..82e0e527b4f5 100644
--- a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
@@ -1,7 +1,11 @@
 #!/usr/bin/env python3
 
-import platform
 import os
+# Fix numba + matplotlib in read-only Singularity container
+os.environ['NUMBA_CACHE_DIR'] = '/tmp'
+os.environ['MPLCONFIGDIR']     = '/tmp'
+
+import platform
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
@@ -12,9 +16,6 @@
 from sklearn.manifold import TSNE
 from umap import UMAP
 
-# Fix numba + matplotlib in read-only Singularity container
-os.environ['NUMBA_CACHE_DIR'] = '/tmp'
-os.environ['MPLCONFIGDIR']     = '/tmp'
 matplotlib.use("Agg")
 
 

From c8fec1515d031310a74d63bd9fb314cc2cbe2813 Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Tue, 12 May 2026 16:34:20 +0200
Subject: [PATCH 27/38] Apply suggestion from @pinin4fjords

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 .../templates/cluster_metrics.py              | 59 ++-----------------
 1 file changed, 5 insertions(+), 54 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index 5be8bac0bfc2..6feb281e2cca 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -88,60 +88,11 @@ def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
     return x, sample_ids
 
 
-def _looks_mostly_numeric(s: pd.Series) -> bool:
-    if len(s) == 0:
-        return False
-    parsed = pd.to_numeric(s.astype(str), errors="coerce")
-    return float(parsed.notna().mean()) >= 0.8
-
-
-def load_clusters(path: str) -> tuple[pd.DataFrame, str]:
-    df = pd.read_csv(path, sep=",", dtype=str)
-    df = df.copy()
-    df.columns = [str(c).lstrip("#") for c in df.columns]
-
-    cols_upper = {str(c).upper(): c for c in df.columns}
-
-    if "CLUSTER" not in cols_upper:
-        raise ValueError("clusters CSV must have a 'cluster' column")
-
-    cluster_col = cols_upper["CLUSTER"]
-
-    if "SAMPLE_ID" in cols_upper:
-        sample_col = cols_upper["SAMPLE_ID"]
-        out = df[[sample_col, cluster_col]].copy()
-        out.columns = ["sample_id", "cluster"]
-        out["sample_id"] = out["sample_id"].astype(str)
-        out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
-        return out, "sample_id"
-
-    try:
-        norm = _normalise_id_column(df.copy())
-        if "sample_id" in norm.columns and "cluster" in norm.columns:
-            out = norm[["sample_id", "cluster"]].copy()
-            out["sample_id"] = out["sample_id"].astype(str)
-            out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
-            return out, "sample_id"
-    except Exception:
-        pass
-
-    other_cols = [c for c in df.columns if c != cluster_col]
-
-    if len(other_cols) == 1:
-        candidate = other_cols[0]
-        candidate_vals = df[candidate].astype(str)
-
-        if not _looks_mostly_numeric(candidate_vals):
-            out = pd.DataFrame(
-                {
-                    "sample_id": candidate_vals,
-                    "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int),
-                }
-            )
-            return out, "sample_id"
-
-    out = pd.DataFrame({"cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int)})
-    return out, "row_order"
+def load_clusters(path: str) -> pd.Series:
+    df = pd.read_csv(path)
+    if "sample_id" not in df.columns or "cluster" not in df.columns:
+        raise ValueError(f"clusters file must have 'sample_id' and 'cluster' columns. Found: {list(df.columns)}")
+    return df.set_index(df["sample_id"].astype(str))["cluster"].astype(int)
 
 
 def safe_cluster_metrics(x: np.ndarray, labels: np.ndarray) -> dict:

From 36344663e2a9e555c8a85aea6945f77de8792c87 Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Tue, 12 May 2026 16:34:49 +0200
Subject: [PATCH 28/38] Apply suggestion from @pinin4fjords

Co-authored-by: Jonathan Manning <pininforthefjords@gmail.com>
---
 .../nf-core/custom/clustermetrics/templates/cluster_metrics.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index 6feb281e2cca..f2e0794a58eb 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -182,7 +182,7 @@ def main() -> None:
     selected["alignment_mode"] = alignment_mode
 
     metrics_tsv = f"{args.out_prefix}_metrics.tsv"
-    pd.DataFrame([selected]).to_csv(metrics_tsv, sep="\t", index=False)
+    pd.DataFrame([selected]).to_csv(metrics_tsv, sep="\\t", index=False)
 
     rows = []
     max_k = min(int(args.k_max), len(x))

From 6a7ac451916b9f31dbf374dca16f5bf819c8aeac Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Tue, 12 May 2026 17:19:30 +0200
Subject: [PATCH 29/38] Prek and script fix

---
 .../custom/clustervisualization/templates/cluster_viz.py    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
index 82e0e527b4f5..c571d16ac884 100644
--- a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
+++ b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
@@ -1,11 +1,13 @@
 #!/usr/bin/env python3
 
 import os
+
 # Fix numba + matplotlib in read-only Singularity container
-os.environ['NUMBA_CACHE_DIR'] = '/tmp'
-os.environ['MPLCONFIGDIR']     = '/tmp'
+os.environ["NUMBA_CACHE_DIR"] = "/tmp"
+os.environ["MPLCONFIGDIR"] = "/tmp"
 
 import platform
+
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np

From cddb5a8c9b5045c284c3784d37574c6b8a61a112 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 14:34:38 +0200
Subject: [PATCH 30/38] Fixed pandas series problem in cluster_metrics.py

---
 .../clustermetrics/templates/cluster_metrics.py  | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index f2e0794a58eb..3a2dacd5c518 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -88,11 +88,19 @@ def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
     return x, sample_ids
 
 
-def load_clusters(path: str) -> pd.Series:
+def load_clusters(path: str) -> tuple[pd.Series, str]:
     df = pd.read_csv(path)
-    if "sample_id" not in df.columns or "cluster" not in df.columns:
-        raise ValueError(f"clusters file must have 'sample_id' and 'cluster' columns. Found: {list(df.columns)}")
-    return df.set_index(df["sample_id"].astype(str))["cluster"].astype(int)
+    if "sample_id" in df.columns and "cluster" in df.columns:
+        series = df.set_index(df["sample_id"].astype(str))["cluster"].astype(int)
+        return series, "sample_id"
+    elif "cluster" in df.columns:
+        series = df["cluster"].astype(int).reset_index(drop=True)
+        return series, "row_order"
+    else:
+        raise ValueError(
+            f"clusters file must have a 'cluster' column (and optionally 'sample_id'). "
+            f"Found: {list(df.columns)}"
+        )
 
 
 def safe_cluster_metrics(x: np.ndarray, labels: np.ndarray) -> dict:

From 4a72291849ec3b90f895c1fcc599ffe39c1dac07 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 14:53:04 +0200
Subject: [PATCH 31/38] fix: escape \n in f-strings for Groovy template
 compatibility

---
 .../templates/cluster_metrics.py              | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index 3a2dacd5c518..fad09dab40b1 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -74,7 +74,7 @@ def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
 
 
 def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
-    df = pd.read_csv(path, sep="\t", dtype=str)
+    df = pd.read_csv(path, sep="\\t", dtype=str)
     df = _normalise_id_column(df)
 
     if "sample_id" not in df.columns:
@@ -146,37 +146,36 @@ def main() -> None:
     args = ap.parse_args()
 
     x_df, sample_ids = load_features(args.features)
-    clusters_df, cluster_mode = load_clusters(args.clusters)
+    clusters_s, cluster_mode = load_clusters(args.clusters)
 
     if cluster_mode == "sample_id":
-        clusters = clusters_df.set_index("sample_id")["cluster"]
-        common = sample_ids[sample_ids.isin(clusters.index)]
+        common = sample_ids[sample_ids.isin(clusters_s.index)]
 
         if len(common) > 0:
             x = x_df.loc[common.index].values
-            labels = clusters.loc[common.values].values
+            labels = clusters_s.loc[common.values].values
             aligned_ids = common.astype(str).tolist()
             alignment_mode = "sample_id"
-        elif len(clusters_df) == len(sample_ids):
+        elif len(clusters_s) == len(sample_ids):
             x = x_df.values
-            labels = clusters_df["cluster"].values
+            labels = clusters_s.values
             aligned_ids = sample_ids.astype(str).tolist()
             alignment_mode = "row_order_fallback"
         else:
             raise ValueError(
                 f"No overlapping sample_id between features and clusters.\\n"
                 f"  features IDs (first 5): {sample_ids.head().tolist()}\\n"
-                f"  clusters IDs (first 5): {list(clusters.index[:5])}"
+                f"  clusters IDs (first 5): {list(clusters_s.index[:5])}"
             )
     else:
-        if len(clusters_df) != len(sample_ids):
+        if len(clusters_s) != len(sample_ids):
             raise ValueError(
-                "clusters CSV has no usable sample_id column and row counts do not match.\\n"
+                "clusters CSV has no usable sample_id column and row counts do not match.\n"
                 f"  n_features={len(sample_ids)}\\n"
-                f"  n_clusters={len(clusters_df)}"
+                f"  n_clusters={len(clusters_s)}"
             )
         x = x_df.values
-        labels = clusters_df["cluster"].values
+        labels = clusters_s.values
         aligned_ids = sample_ids.astype(str).tolist()
         alignment_mode = "row_order"
 

From 2bf8afac71bec0d7d86ad6644307137a87659bb9 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 15:49:01 +0200
Subject: [PATCH 32/38] Format CUSTOM_CLUSTERMETRICS template with ruff

---
 modules/nf-core/custom/clustermetrics/main.nf              | 4 ++--
 .../custom/clustermetrics/templates/cluster_metrics.py     | 7 +++----
 modules/nf-core/custom/clustervisualization/main.nf        | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
index 67802950c6c2..e7fe70ca970e 100644
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ b/modules/nf-core/custom/clustermetrics/main.nf
@@ -3,8 +3,8 @@ process CUSTOM_CLUSTERMETRICS {
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-    'docker://community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' :
-    'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
+    'docker://community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' :
+    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' }"
     input:
     tuple val(meta), path(features), path(clusters)
 
diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
index fad09dab40b1..f8fb15f2e776 100644
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
@@ -98,8 +98,7 @@ def load_clusters(path: str) -> tuple[pd.Series, str]:
         return series, "row_order"
     else:
         raise ValueError(
-            f"clusters file must have a 'cluster' column (and optionally 'sample_id'). "
-            f"Found: {list(df.columns)}"
+            f"clusters file must have a 'cluster' column (and optionally 'sample_id'). Found: {list(df.columns)}"
         )
 
 
@@ -170,7 +169,7 @@ def main() -> None:
     else:
         if len(clusters_s) != len(sample_ids):
             raise ValueError(
-                "clusters CSV has no usable sample_id column and row counts do not match.\n"
+                "clusters CSV has no usable sample_id column and row counts do not match.\\n"
                 f"  n_features={len(sample_ids)}\\n"
                 f"  n_clusters={len(clusters_s)}"
             )
@@ -267,7 +266,7 @@ def plot_curve(metric, title, ylabel, out_png):
 
 
 if __name__ == "__main__":
-    prefix = "${task.ext.prefix ?: meta.id}"
+    prefix = "${task.ext.prefix ? task.ext.prefix : meta.id}"
 
     sys.argv = [
         "cluster_metrics.py",
diff --git a/modules/nf-core/custom/clustervisualization/main.nf b/modules/nf-core/custom/clustervisualization/main.nf
index a96b0e342c6b..7d9156a0cb14 100644
--- a/modules/nf-core/custom/clustervisualization/main.nf
+++ b/modules/nf-core/custom/clustervisualization/main.nf
@@ -3,8 +3,8 @@ process CUSTOM_CLUSTERVISUALIZATION {
     label 'process_medium'
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-    'docker://community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' :
-    'community.wave.seqera.io/library/matplotlib_numpy_pandas_python_pruned:6b81abc92579656a' }"
+    'docker://community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' :
+    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' }"
     input:
     tuple val(meta), path(features), path(clusters)
 

From fd15709d9e3f33d246b21aca6d778dea1f02f1dd Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 16:37:10 +0200
Subject: [PATCH 33/38] fix(clustermetrics,clustervisualization): update
 nf-test snapshots and test assertions

---
 .../custom/clustermetrics/tests/main.nf.test  |  54 +++++--
 .../clustermetrics/tests/main.nf.test.snap    | 136 +++++++-----------
 .../clustervisualization/tests/main.nf.test   |  53 ++++---
 .../tests/main.nf.test.snap                   | 129 ++++++++---------
 4 files changed, 187 insertions(+), 185 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/tests/main.nf.test b/modules/nf-core/custom/clustermetrics/tests/main.nf.test
index 1d6613fa3709..32f903d2fdeb 100644
--- a/modules/nf-core/custom/clustermetrics/tests/main.nf.test
+++ b/modules/nf-core/custom/clustermetrics/tests/main.nf.test
@@ -1,44 +1,70 @@
 nextflow_process {
-    name "Test Process CLUSTER_METRICS"
+
+    name "Test Process CUSTOM_CLUSTERMETRICS"
     script "../main.nf"
     process "CUSTOM_CLUSTERMETRICS"
+
     tag "modules"
     tag "modules_nfcore"
     tag "custom"
     tag "custom/clustermetrics"
-    tag "clustermetrics"
 
     test("clustermetrics - features and clusters") {
+
         when {
             process {
                 """
-                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[0] = [
+                    [ id:'test' ],
+                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true)
+                ]
                 """
             }
         }
+
         then {
-            assert process.success
-            assert snapshot(
-                process.out.metrics,
-                process.out.k_sweep,
-                process.out.selected,
-                process.out.versions
-            ).match()
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.metrics,
+                    process.out.k_sweep,
+                    process.out.selected,
+                    process.out.versions,
+                    path(process.out.versions[0]).yaml
+                ).match() }
+            )
         }
     }
 
     test("clustermetrics - features and clusters - stub") {
+
         options "-stub"
+
         when {
             process {
                 """
-                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true), file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true) ]
+                input[0] = [
+                    [ id:'test' ],
+                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true),
+                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true)
+                ]
                 """
             }
         }
+
         then {
-            assert process.success
-            assert snapshot(process.out).match()
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.metrics,
+                    process.out.k_sweep,
+                    process.out.selected,
+                    process.out.plots,
+                    process.out.versions,
+                    path(process.out.versions[0]).yaml
+                ).match() }
+            )
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
index 15deaf982452..034252978e7f 100644
--- a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
@@ -27,9 +27,17 @@
             ],
             [
                 "versions.yml:md5,236501fe75ac914d4de40a2c42dbec6b"
-            ]
+            ],
+            {
+                "CUSTOM_CLUSTERMETRICS": {
+                    "python": "3.13.7",
+                    "pandas": "3.0.0",
+                    "scikit-learn": "1.8.0",
+                    "matplotlib": "3.10.7"
+                }
+            }
         ],
-        "timestamp": "2026-05-11T13:08:00.102276222",
+        "timestamp": "2026-05-13T16:29:03.258208972",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -37,90 +45,56 @@
     },
     "clustermetrics - features and clusters - stub": {
         "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_metrics.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_k_sweep.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "2": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_selected.json:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "3": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test_calinski.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "test_davies_bouldin.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "test_elbow.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "test_silhouette.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                        ]
-                    ]
-                ],
-                "4": [
-                    "versions.yml:md5,48951b680bf5fe7b6b0242dd160a2618"
-                ],
-                "k_sweep": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_k_sweep.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "metrics": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_metrics.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "plots": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "test_calinski.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "test_davies_bouldin.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "test_elbow.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                            "test_silhouette.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                        ]
-                    ]
-                ],
-                "selected": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_metrics.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_k_sweep.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_selected.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
                     [
-                        {
-                            "id": "test"
-                        },
-                        "test_selected.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "test_calinski.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test_davies_bouldin.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test_elbow.png:md5,d41d8cd98f00b204e9800998ecf8427e",
+                        "test_silhouette.png:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
-                ],
-                "versions": [
-                    "versions.yml:md5,48951b680bf5fe7b6b0242dd160a2618"
                 ]
+            ],
+            [
+                "versions.yml:md5,48951b680bf5fe7b6b0242dd160a2618"
+            ],
+            {
+                "CUSTOM_CLUSTERMETRICS": {
+                    "python": "3.13.7",
+                    "pandas": "3.0.0",
+                    "scikit-learn": "1.8.0",
+                    "matplotlib": "3.10.7"
+                }
             }
         ],
-        "timestamp": "2026-05-11T13:05:52.932850421",
+        "timestamp": "2026-05-13T16:29:07.20576465",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test b/modules/nf-core/custom/clustervisualization/tests/main.nf.test
index bda59809d0b5..1256c87895f0 100644
--- a/modules/nf-core/custom/clustervisualization/tests/main.nf.test
+++ b/modules/nf-core/custom/clustervisualization/tests/main.nf.test
@@ -1,50 +1,69 @@
 nextflow_process {
+
     name "Test Process CUSTOM_CLUSTERVISUALIZATION"
     script "../main.nf"
     process "CUSTOM_CLUSTERVISUALIZATION"
+
     tag "modules"
     tag "modules_nfcore"
     tag "custom"
     tag "custom/clustervisualization"
-    tag "clustervisualization"
 
     test("clustervisualization - features and clusters") {
+
         when {
-            params {
-                nf_test = true
-             }
             process {
                 """
-                input[0] = [ [id:'test'],
+                input[0] = [
+                    [ id:'test' ],
                     file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true) ]
+                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true)
+                ]
                 """
             }
         }
+
         then {
-            assert process.success
-            assert snapshot(
-                process.out.umap_tsv,
-                process.out.tsne_tsv,
-                process.out.versions
-            ).match()
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.umap_tsv,
+                    process.out.tsne_tsv,
+                    process.out.versions,
+                    path(process.out.versions[0]).yaml
+                ).match() }
+            )
         }
     }
 
     test("clustervisualization - features and clusters - stub") {
+
         options "-stub"
+
         when {
             process {
                 """
-                input[0] = [ [id:'test'],
+                input[0] = [
+                    [ id:'test' ],
                     file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true) ]
+                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true)
+                ]
                 """
             }
         }
+
         then {
-            assert process.success
-            assert snapshot(process.out).match()
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.umap_tsv,
+                    process.out.tsne_tsv,
+                    process.out.umap_png,
+                    process.out.tsne_png,
+                    process.out.versions,
+                    path(process.out.versions[0]).yaml
+                ).match() }
+            )
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap b/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
index 610912e901ea..f0911139ed24 100644
--- a/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
@@ -1,80 +1,53 @@
 {
     "clustervisualization - features and clusters - stub": {
         "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.umap.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.tsne.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "2": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.umap.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "3": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.tsne.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "4": [
-                    "versions.yml:md5,a50bc5907c3a28f12238b5497e3f4c67"
-                ],
-                "tsne_png": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.tsne.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "tsne_tsv": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.tsne.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "umap_png": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.umap.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "umap_tsv": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.umap.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,a50bc5907c3a28f12238b5497e3f4c67"
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.umap.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.tsne.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.umap.png:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.tsne.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ],
+            [
+                "versions.yml:md5,a50bc5907c3a28f12238b5497e3f4c67"
+            ],
+            {
+                "CUSTOM_CLUSTERVISUALIZATION": {
+                    "python": "3.13.7",
+                    "pandas": "3.0.0",
+                    "matplotlib": "3.10.7",
+                    "seaborn": "0.13.2",
+                    "umap-learn": "0.5.9.post2",
+                    "scikit-learn": "1.8.0"
+                }
             }
         ],
-        "timestamp": "2026-05-11T14:15:13.987219333",
+        "timestamp": "2026-05-13T16:35:48.765737703",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -100,9 +73,19 @@
             ],
             [
                 "versions.yml:md5,43b533ced227b510ca833d01881efc8e"
-            ]
+            ],
+            {
+                "CUSTOM_CLUSTERVISUALIZATION": {
+                    "python": "3.13.7",
+                    "pandas": "3.0.0",
+                    "matplotlib": "3.10.7",
+                    "seaborn": "0.13.2",
+                    "umap-learn": "0.5.9.post2",
+                    "scikit-learn": "1.8.0"
+                }
+            }
         ],
-        "timestamp": "2026-05-11T14:15:07.547048716",
+        "timestamp": "2026-05-13T16:35:41.90989098",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"

From 6114ebd63a83f18d84e3e3f78fcf06ca46dbacde Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 16:51:18 +0200
Subject: [PATCH 34/38] fix environment conflict

---
 .../custom/clustermetrics/environment.yml     |  7 +++--
 .../clustermetrics/tests/main.nf.test.snap    | 24 +++++++--------
 .../clustervisualization/environment.yml      | 14 +++++----
 .../tests/main.nf.test.snap                   | 30 +++++++++----------
 4 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/modules/nf-core/custom/clustermetrics/environment.yml b/modules/nf-core/custom/clustermetrics/environment.yml
index f4cde46c06ab..ccbc287ac332 100644
--- a/modules/nf-core/custom/clustermetrics/environment.yml
+++ b/modules/nf-core/custom/clustermetrics/environment.yml
@@ -1,3 +1,4 @@
+# clustermetrics/environment.yml
 ---
 # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
 channels:
@@ -6,8 +7,8 @@ channels:
 dependencies:
   - conda-forge::matplotlib=3.9.4
   - conda-forge::numpy=2.4.2
-  - conda-forge::pandas=2.3.2
+  - conda-forge::pandas=2.2.3
   - conda-forge::python=3.12.12
-  - conda-forge::scikit-learn=1.8.0
+  - conda-forge::scikit-learn=1.6.1
   - conda-forge::seaborn=0.13.2
-  - conda-forge::umap-learn=0.5.12
+  - conda-forge::umap-learn=0.5.12
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
index 034252978e7f..789c38b0def7 100644
--- a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
@@ -26,18 +26,18 @@
                 ]
             ],
             [
-                "versions.yml:md5,236501fe75ac914d4de40a2c42dbec6b"
+                "versions.yml:md5,67cab9dfa6c955b0927cf3ff6fc8c5bd"
             ],
             {
                 "CUSTOM_CLUSTERMETRICS": {
-                    "python": "3.13.7",
-                    "pandas": "3.0.0",
-                    "scikit-learn": "1.8.0",
-                    "matplotlib": "3.10.7"
+                    "python": "3.12.12",
+                    "pandas": "2.2.3",
+                    "scikit-learn": "1.6.1",
+                    "matplotlib": "3.9.4"
                 }
             }
         ],
-        "timestamp": "2026-05-13T16:29:03.258208972",
+        "timestamp": "2026-05-13T16:49:11.628681612",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -83,18 +83,18 @@
                 ]
             ],
             [
-                "versions.yml:md5,48951b680bf5fe7b6b0242dd160a2618"
+                "versions.yml:md5,939e81a1c6d66dde0edb847e3e61defd"
             ],
             {
                 "CUSTOM_CLUSTERMETRICS": {
-                    "python": "3.13.7",
-                    "pandas": "3.0.0",
-                    "scikit-learn": "1.8.0",
-                    "matplotlib": "3.10.7"
+                    "python": "3.12.12",
+                    "pandas": "2.2.3",
+                    "scikit-learn": "1.6.1",
+                    "matplotlib": "3.9.4"
                 }
             }
         ],
-        "timestamp": "2026-05-13T16:29:07.20576465",
+        "timestamp": "2026-05-13T16:49:29.799534772",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
diff --git a/modules/nf-core/custom/clustervisualization/environment.yml b/modules/nf-core/custom/clustervisualization/environment.yml
index c2cf95a372d2..8addb3305878 100644
--- a/modules/nf-core/custom/clustervisualization/environment.yml
+++ b/modules/nf-core/custom/clustervisualization/environment.yml
@@ -1,12 +1,14 @@
+# clustervisualization/environment.yml
 ---
 # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
 channels:
   - conda-forge
   - bioconda
 dependencies:
-  - matplotlib=3.9.*
-  - pandas=2.2.*
-  - python=3.12
-  - scikit-learn=1.6.*
-  - seaborn=0.13.*
-  - umap-learn=0.5.*
+  - conda-forge::matplotlib=3.9.4
+  - conda-forge::numpy=2.4.2
+  - conda-forge::pandas=2.2.3
+  - conda-forge::python=3.12.12
+  - conda-forge::scikit-learn=1.6.1
+  - conda-forge::seaborn=0.13.2
+  - conda-forge::umap-learn=0.5.12
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap b/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
index f0911139ed24..03d8e5832d23 100644
--- a/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
@@ -34,20 +34,20 @@
                 ]
             ],
             [
-                "versions.yml:md5,a50bc5907c3a28f12238b5497e3f4c67"
+                "versions.yml:md5,f79a1469aa94553d8d58281262a9d76b"
             ],
             {
                 "CUSTOM_CLUSTERVISUALIZATION": {
-                    "python": "3.13.7",
-                    "pandas": "3.0.0",
-                    "matplotlib": "3.10.7",
+                    "python": "3.12.12",
+                    "pandas": "2.2.3",
+                    "matplotlib": "3.9.4",
                     "seaborn": "0.13.2",
-                    "umap-learn": "0.5.9.post2",
-                    "scikit-learn": "1.8.0"
+                    "umap-learn": "0.5.12",
+                    "scikit-learn": "1.6.1"
                 }
             }
         ],
-        "timestamp": "2026-05-13T16:35:48.765737703",
+        "timestamp": "2026-05-13T16:50:23.618656008",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"
@@ -60,7 +60,7 @@
                     {
                         "id": "test"
                     },
-                    "test.umap.tsv:md5,2cba3fa6ba2d3ce80ad884b4210403eb"
+                    "test.umap.tsv:md5,50c3bb50b36a174c55dd45201e9c0036"
                 ]
             ],
             [
@@ -72,20 +72,20 @@
                 ]
             ],
             [
-                "versions.yml:md5,43b533ced227b510ca833d01881efc8e"
+                "versions.yml:md5,c9099db9969c17be8e1f69dfd9ed925a"
             ],
             {
                 "CUSTOM_CLUSTERVISUALIZATION": {
-                    "python": "3.13.7",
-                    "pandas": "3.0.0",
-                    "matplotlib": "3.10.7",
+                    "python": "3.12.12",
+                    "pandas": "2.2.3",
+                    "matplotlib": "3.9.4",
                     "seaborn": "0.13.2",
-                    "umap-learn": "0.5.9.post2",
-                    "scikit-learn": "1.8.0"
+                    "umap-learn": "0.5.12",
+                    "scikit-learn": "1.6.1"
                 }
             }
         ],
-        "timestamp": "2026-05-13T16:35:41.90989098",
+        "timestamp": "2026-05-13T16:49:58.200828019",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"

From 8348609a82866bf8edca58f3bc2504482be95222 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 19:07:11 +0200
Subject: [PATCH 35/38] feat: add custom clustering and metrics modules

---
 .../nf-core/custom/clustering/environment.yml |  14 ++
 modules/nf-core/custom/clustering/main.nf     |  40 ++++
 modules/nf-core/custom/clustering/meta.yml    |  77 ++++++++
 .../custom/clustering/templates/clustering.py | 184 ++++++++++++++++++
 .../clustering/tests/data/test.eigenvec       |   6 +
 .../custom/clustering/tests/main.nf.test      |  51 +++++
 .../custom/clustering/tests/main.nf.test.snap |  79 ++++++++
 7 files changed, 451 insertions(+)
 create mode 100644 modules/nf-core/custom/clustering/environment.yml
 create mode 100644 modules/nf-core/custom/clustering/main.nf
 create mode 100644 modules/nf-core/custom/clustering/meta.yml
 create mode 100644 modules/nf-core/custom/clustering/templates/clustering.py
 create mode 100644 modules/nf-core/custom/clustering/tests/data/test.eigenvec
 create mode 100644 modules/nf-core/custom/clustering/tests/main.nf.test
 create mode 100644 modules/nf-core/custom/clustering/tests/main.nf.test.snap

diff --git a/modules/nf-core/custom/clustering/environment.yml b/modules/nf-core/custom/clustering/environment.yml
new file mode 100644
index 000000000000..4b7a89234526
--- /dev/null
+++ b/modules/nf-core/custom/clustering/environment.yml
@@ -0,0 +1,14 @@
+# clustermetrics/environment.yml
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::matplotlib=3.9.4
+  - conda-forge::numpy=2.4.2
+  - conda-forge::pandas=2.2.3
+  - conda-forge::python=3.12.12
+  - conda-forge::scikit-learn=1.6.1
+  - conda-forge::seaborn=0.13.2
+  - conda-forge::umap-learn=0.5.12
diff --git a/modules/nf-core/custom/clustering/main.nf b/modules/nf-core/custom/clustering/main.nf
new file mode 100644
index 000000000000..2f6f778dc08c
--- /dev/null
+++ b/modules/nf-core/custom/clustering/main.nf
@@ -0,0 +1,40 @@
+process CUSTOM_CLUSTERING {
+    tag "$meta.id"
+    label 'process_medium'
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
+    'docker://community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' :
+    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' }"
+
+    input:
+    tuple val(meta), path(eigenvec)
+    val algorithm
+    val n_clusters
+    val dbscan_eps
+    val dbscan_min_samples
+
+    output:
+    tuple val(meta), path("*_clusters.csv")         , emit: clusters
+    tuple val(meta), path("*_clustering_info.json") , emit: info, optional: true
+    path "versions.yml"                             , emit: versions, topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    template 'clustering.py'
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_clusters.csv
+    touch ${prefix}_clustering_info.json
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python3 --version | sed 's/Python //')
+        pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
+        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/custom/clustering/meta.yml b/modules/nf-core/custom/clustering/meta.yml
new file mode 100644
index 000000000000..7eb83d610dfe
--- /dev/null
+++ b/modules/nf-core/custom/clustering/meta.yml
@@ -0,0 +1,77 @@
+name: "CUSTOM_CLUSTERING"
+description: "Performs KMeans or DBSCAN clustering on principal components from PLINK2
+  --pca"
+keywords:
+  - clustering
+  - pca
+  - kmeans
+  - dbscan
+  - principal-components
+tools:
+  - "scikit-learn":
+      description: "Machine learning library for clustering"
+      homepage: "https://scikit-learn.org/"
+      documentation: "https://scikit-learn.org/stable/modules/clustering.html"
+      licence:
+        - "BSD-3-Clause"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - eigenvec:
+        type: file
+        description: PLINK2 .eigenvec file generated by --pca
+        pattern: "*.eigenvec"
+        ontologies: []
+  - algorithm:
+      type: string
+      description: Clustering algorithm to use (kmeans or dbscan)
+  - n_clusters:
+      type: integer
+      description: Number of clusters for KMeans
+  - dbscan_eps:
+      type: float
+      description: Epsilon parameter for DBSCAN
+  - dbscan_min_samples:
+      type: integer
+      description: Minimum samples parameter for DBSCAN
+output:
+  clusters:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_clusters.csv":
+          type: file
+          description: CSV file with sample_id and assigned cluster
+          pattern: "*_clusters.csv"
+          ontologies:
+            - edam: http://edamontology.org/format_3752
+  info:
+    - - meta:
+          type: map
+          description: Groovy Map containing sample information
+      - "*_clustering_info.json":
+          type: file
+          description: JSON file with clustering parameters and statistics
+          pattern: "*_clustering_info.json"
+          ontologies:
+            - edam: http://edamontology.org/format_3464
+  versions:
+    - "versions.yml":
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750
+topics:
+  versions:
+    - versions.yml:
+        type: string
+        description: The name of the process
+authors:
+  - "@dbaku42"
+maintainers:
+  - "@dbaku42"
diff --git a/modules/nf-core/custom/clustering/templates/clustering.py b/modules/nf-core/custom/clustering/templates/clustering.py
new file mode 100644
index 000000000000..5175b294a86c
--- /dev/null
+++ b/modules/nf-core/custom/clustering/templates/clustering.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+
+import json
+import platform
+import sklearn
+import yaml
+import re
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from sklearn.cluster import KMeans, DBSCAN
+
+PC_COL_RE = re.compile('[Pp][Cc][0-9]+', re.IGNORECASE)
+
+
+def convert_eigenvec_to_tsv(eigenvec_path, out_pca, id_mode='iid'):
+    rows = []
+    n_pcs = 0
+    mode = None
+
+    with eigenvec_path.open('r') as fh:
+        for line in fh:
+            line = line.strip()
+            if not line:
+                continue
+            parts = line.split()
+            if parts[0].startswith('#'):
+                header = [p.lstrip('#') for p in parts]
+                if len(header) >= 2 and header[0].upper() == 'FID' and header[1].upper() == 'IID':
+                    mode = 'fid_iid'
+                elif header[0].upper() == 'IID':
+                    mode = 'iid_only'
+                continue
+            if mode is None:
+                try:
+                    float(parts[1])
+                    mode = 'iid_only'
+                except (ValueError, IndexError):
+                    mode = 'fid_iid'
+            if mode == 'fid_iid':
+                if len(parts) < 3:
+                    continue
+                fid = parts[0]
+                iid = parts[1]
+                pcs = parts[2:]
+                sample_id = iid if id_mode == 'iid' else f'{fid}:{iid}'
+            elif mode == 'iid_only':
+                if len(parts) < 2:
+                    continue
+                iid = parts[0]
+                pcs = parts[1:]
+                sample_id = iid
+            else:
+                raise ValueError(f'Unrecognized eigenvec format in {eigenvec_path}')
+            if n_pcs == 0:
+                n_pcs = len(pcs)
+            rows.append((sample_id, pcs))
+
+    if not rows:
+        raise ValueError(f'No valid data found in {eigenvec_path}')
+
+    header = ['sample_id'] + [f'PC{i+1}' for i in range(n_pcs)]
+    with out_pca.open('w') as fh:
+        fh.write('\\t'.join(header) + '\\n')
+        for sample_id, pcs in rows:
+            fh.write(sample_id + '\\t' + '\\t'.join(pcs) + '\\n')
+
+    print(f'[INFO] Converted {len(rows)} samples with {n_pcs} PCs -> {out_pca}')
+    return n_pcs
+
+
+def read_table_robust(path):
+    df = pd.read_csv(path, sep='\\t', dtype=str)
+    print(f'[DEBUG] Initial read: {df.shape[0]} rows x {df.shape[1]} cols', flush=True)
+    col_names_upper = set(str(c).upper() for c in df.columns)
+
+    def is_header_row(row):
+        row_values_upper = [str(v).upper() for v in row.values]
+        overlap = sum(1 for v in row_values_upper if v in col_names_upper)
+        if overlap >= 3:
+            return True
+        header_keywords = {'FID', 'IID', 'PC1', 'PC2', 'PC3'}
+        if sum(1 for v in row_values_upper if v in header_keywords) >= 2:
+            return True
+        return False
+
+    bad_rows = df.apply(is_header_row, axis=1)
+    if bad_rows.any():
+        n_bad = int(bad_rows.sum())
+        print(f'[INFO] Removed {n_bad} duplicate header row(s)', flush=True)
+        df = df[~bad_rows].copy().reset_index(drop=True)
+
+    print(f'[INFO] After cleanup: {df.shape[0]} rows x {df.shape[1]} cols', flush=True)
+    return df
+
+
+def build_sample_id(df):
+    cols = list(df.columns)
+    if 'sample_id' in df.columns:
+        return df['sample_id'].astype(str), df.drop(columns=['sample_id'])
+    iid_candidates = [c for c in cols if str(c).upper() == 'IID']
+    if iid_candidates:
+        iid = iid_candidates[0]
+        return df[iid].astype(str), df.drop(columns=[iid])
+    fid_candidates = [c for c in cols if str(c).upper() == 'FID']
+    if fid_candidates and iid_candidates:
+        fid = fid_candidates[0]
+        iid = iid_candidates[0]
+        sample_ids = df[iid].astype(str)
+        return sample_ids, df.drop(columns=[c for c in [fid, iid] if c in df.columns])
+    pc_cols = [c for c in cols if PC_COL_RE.match(str(c))]
+    non_pc_cols = [c for c in cols if c not in pc_cols]
+    if non_pc_cols:
+        id_col = non_pc_cols[0]
+        return df[id_col].astype(str), df.drop(columns=[id_col])
+    return pd.Series([f'sample_{i}' for i in range(len(df))], index=df.index), df
+
+
+def main():
+    prefix = '${meta.id}'
+
+    pca_tsv = Path(f'{prefix}_pca_scores.tsv')
+    convert_eigenvec_to_tsv(Path('${eigenvec}'), pca_tsv, 'iid')
+
+    df = read_table_robust(str(pca_tsv))
+    sample_ids, df_feats = build_sample_id(df)
+
+    pc_cols = [c for c in df_feats.columns if PC_COL_RE.match(str(c))]
+    if not pc_cols:
+        raise ValueError('No PC columns found in input')
+
+    X = df_feats[pc_cols].apply(pd.to_numeric, errors='coerce').values
+    if np.isnan(X).any():
+        raise ValueError('NaN values detected in PCA data')
+
+    print(f'[INFO] Loaded {X.shape[0]} samples x {X.shape[1]} principal components', flush=True)
+
+    if '${algorithm}' == 'kmeans':
+        model = KMeans(n_clusters=${n_clusters}, init='random', n_init=100, random_state=42)
+        labels = model.fit_predict(X)
+        info = {'algorithm': 'kmeans', 'k': ${n_clusters}, 'inertia': float(model.inertia_)}
+    else:
+        model = DBSCAN(eps=${dbscan_eps}, min_samples=${dbscan_min_samples})
+        labels = model.fit_predict(X)
+        n_found = len(set(labels)) - (1 if -1 in labels else 0)
+        n_noise = int(np.sum(labels == -1))
+        info = {
+            'algorithm': 'dbscan',
+            'eps': ${dbscan_eps},
+            'min_samples': ${dbscan_min_samples},
+            'n_clusters_found': int(n_found),
+            'n_noise': n_noise
+        }
+
+    out_clusters = f'{prefix}_clusters.csv'
+    out_info = f'{prefix}_clustering_info.json'
+
+    pd.DataFrame({'sample_id': sample_ids.astype(str), 'cluster': labels}).to_csv(out_clusters, index=False)
+    info.update({
+        'n_samples': int(X.shape[0]),
+        'n_features': int(X.shape[1]),
+        'feature_names': pc_cols,
+        'input_file': Path('${eigenvec}').name
+    })
+    Path(out_info).write_text(json.dumps(info, indent=2))
+
+    print('[SUCCESS] Clustering completed:')
+    print(f'   -> Clusters : {out_clusters}')
+    print(f'   -> Info     : {out_info}')
+
+
+    versions = {
+        'CUSTOM_CLUSTERING': {
+            'python': platform.python_version(),
+            'scikit-learn': sklearn.__version__,
+            'pandas': pd.__version__,
+            'numpy': np.__version__,
+        }
+    }
+    with open('versions.yml', 'w') as fh:
+        fh.write(yaml.dump(versions, default_flow_style=False))
+
+main()
diff --git a/modules/nf-core/custom/clustering/tests/data/test.eigenvec b/modules/nf-core/custom/clustering/tests/data/test.eigenvec
new file mode 100644
index 000000000000..d0281ae180ce
--- /dev/null
+++ b/modules/nf-core/custom/clustering/tests/data/test.eigenvec
@@ -0,0 +1,6 @@
+#FID	IID	PC1	PC2	PC3
+0	sample01	0.1234	0.5678	0.9012
+0	sample02	-0.2345	0.6789	-0.0123
+0	sample03	0.3456	-0.7890	0.1234
+0	sample04	-0.4567	0.8901	-0.2345
+0	sample05	0.5678	-0.9012	0.3456
diff --git a/modules/nf-core/custom/clustering/tests/main.nf.test b/modules/nf-core/custom/clustering/tests/main.nf.test
new file mode 100644
index 000000000000..c590a235983c
--- /dev/null
+++ b/modules/nf-core/custom/clustering/tests/main.nf.test
@@ -0,0 +1,51 @@
+nextflow_process {
+    name "Test Process CUSTOM_CLUSTERING"
+    script "../main.nf"
+    process "CUSTOM_CLUSTERING"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "custom/clustering"
+
+    test("clustering - eigenvec") {
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustering/tests/data/test.eigenvec", checkIfExists: true) ]
+                input[1] = 'kmeans'
+                input[2] = 3
+                input[3] = 0.5
+                input[4] = 5
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(
+                process.out.clusters,
+                process.out.info,
+                process.out.versions
+            ).match()
+        }
+    }
+
+    test("clustering - eigenvec - stub") {
+        options "-stub"
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file("${projectDir}/modules/nf-core/custom/clustering/tests/data/test.eigenvec", checkIfExists: true) ]
+                input[1] = 'kmeans'
+                input[2] = 3
+                input[3] = 0.5
+                input[4] = 5
+                """
+            }
+        }
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+    }
+}
diff --git a/modules/nf-core/custom/clustering/tests/main.nf.test.snap b/modules/nf-core/custom/clustering/tests/main.nf.test.snap
new file mode 100644
index 000000000000..b852b800bcb4
--- /dev/null
+++ b/modules/nf-core/custom/clustering/tests/main.nf.test.snap
@@ -0,0 +1,79 @@
+{
+    "clustering - eigenvec - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_clusters.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_clustering_info.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,664d3210ebe520f6f680bb7c41d9b15e"
+                ],
+                "clusters": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_clusters.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "info": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_clustering_info.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,664d3210ebe520f6f680bb7c41d9b15e"
+                ]
+            }
+        ],
+        "timestamp": "2026-05-13T18:21:30.37624233",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    },
+    "clustering - eigenvec": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_clusters.csv:md5,a0ce7a662fecdb42e15e2b2aa0906cf4"
+                ]
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_clustering_info.json:md5,6e61eece1d6cad24489312531115e55a"
+                ]
+            ],
+            [
+                "versions.yml:md5,a5f57bd446ec1ba732607243bebd93fc"
+            ]
+        ],
+        "timestamp": "2026-05-13T18:53:15.637280975",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.09.0"
+        }
+    }
+}
\ No newline at end of file

From b33105dfbd7fe3ce0bfef283ad214a930eeb8490 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 22:18:48 +0200
Subject: [PATCH 36/38] chore: remove extra custom modules from clustering PR

---
 .../custom/clustermetrics/environment.yml     |  14 -
 modules/nf-core/custom/clustermetrics/main.nf |  43 ---
 .../nf-core/custom/clustermetrics/meta.yml    |  90 ------
 .../templates/cluster_metrics.py              | 289 ------------------
 .../tests/data/test_clusters.csv              |   6 -
 .../tests/data/test_features.tsv              |   6 -
 .../custom/clustermetrics/tests/main.nf.test  |  70 -----
 .../clustermetrics/tests/main.nf.test.snap    | 103 -------
 .../clustervisualization/environment.yml      |  14 -
 .../custom/clustervisualization/main.nf       |  42 ---
 .../custom/clustervisualization/meta.yml      |  96 ------
 .../templates/cluster_viz.py                  | 234 --------------
 .../tests/data/test_clusters.csv              |   6 -
 .../tests/data/test_features.tsv              |   6 -
 .../tests/data/test_pca.eigenvec              |   6 -
 .../clustervisualization/tests/main.nf.test   |  69 -----
 .../tests/main.nf.test.snap                   |  94 ------
 17 files changed, 1188 deletions(-)
 delete mode 100644 modules/nf-core/custom/clustermetrics/environment.yml
 delete mode 100644 modules/nf-core/custom/clustermetrics/main.nf
 delete mode 100644 modules/nf-core/custom/clustermetrics/meta.yml
 delete mode 100644 modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
 delete mode 100644 modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv
 delete mode 100644 modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv
 delete mode 100644 modules/nf-core/custom/clustermetrics/tests/main.nf.test
 delete mode 100644 modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/custom/clustervisualization/environment.yml
 delete mode 100644 modules/nf-core/custom/clustervisualization/main.nf
 delete mode 100644 modules/nf-core/custom/clustervisualization/meta.yml
 delete mode 100644 modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
 delete mode 100644 modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv
 delete mode 100644 modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv
 delete mode 100644 modules/nf-core/custom/clustervisualization/tests/data/test_pca.eigenvec
 delete mode 100644 modules/nf-core/custom/clustervisualization/tests/main.nf.test
 delete mode 100644 modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap

diff --git a/modules/nf-core/custom/clustermetrics/environment.yml b/modules/nf-core/custom/clustermetrics/environment.yml
deleted file mode 100644
index ccbc287ac332..000000000000
--- a/modules/nf-core/custom/clustermetrics/environment.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-# clustermetrics/environment.yml
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - conda-forge::matplotlib=3.9.4
-  - conda-forge::numpy=2.4.2
-  - conda-forge::pandas=2.2.3
-  - conda-forge::python=3.12.12
-  - conda-forge::scikit-learn=1.6.1
-  - conda-forge::seaborn=0.13.2
-  - conda-forge::umap-learn=0.5.12
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustermetrics/main.nf b/modules/nf-core/custom/clustermetrics/main.nf
deleted file mode 100644
index e7fe70ca970e..000000000000
--- a/modules/nf-core/custom/clustermetrics/main.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-process CUSTOM_CLUSTERMETRICS {
-    tag "$meta.id"
-    label 'process_medium'
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-    'docker://community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' :
-    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' }"
-    input:
-    tuple val(meta), path(features), path(clusters)
-
-    output:
-    tuple val(meta), path("*_metrics.tsv")     , emit: metrics
-    tuple val(meta), path("*_k_sweep.csv")     , emit: k_sweep
-    tuple val(meta), path("*_selected.json")   , emit: selected
-    tuple val(meta), path("*.png")             , emit: plots, optional: true
-    path "versions.yml"                        , emit: versions, topic: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    template 'cluster_metrics.py'
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}_metrics.tsv
-    touch ${prefix}_k_sweep.csv
-    touch ${prefix}_selected.json
-    touch ${prefix}_elbow.png
-    touch ${prefix}_silhouette.png
-    touch ${prefix}_davies_bouldin.png
-    touch ${prefix}_calinski.png
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python3 --version | sed 's/Python //')
-        pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
-        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
-        matplotlib: \$(python3 -c "import matplotlib; print(matplotlib.__version__)")
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/custom/clustermetrics/meta.yml b/modules/nf-core/custom/clustermetrics/meta.yml
deleted file mode 100644
index 432d56069427..000000000000
--- a/modules/nf-core/custom/clustermetrics/meta.yml
+++ /dev/null
@@ -1,90 +0,0 @@
-name: "CUSTOM_CLUSTERMETRICS"
-description: "Computes clustering quality metrics (silhouette, Calinski-Harabasz,
-  Davies-Bouldin) and performs k-sweep analysis"
-keywords:
-  - clustering
-  - metrics
-  - silhouette
-  - calinski-harabasz
-  - davies-bouldin
-  - evaluation
-tools:
-  - "scikit-learn":
-      description: "Machine learning library for clustering metrics"
-      homepage: "https://scikit-learn.org/"
-      documentation: "https://scikit-learn.org/stable/modules/clustering.html"
-      licence:
-        - "BSD-3-Clause"
-      identifier: ""
-input:
-  - - meta:
-        type: map
-        description: |
-          Groovy Map containing sample information
-          e.g. `[ id:'sample1' ]`
-    - features:
-        type: file
-        description: Feature matrix file
-        pattern: "*"
-        ontologies: []
-    - clusters:
-        type: file
-        description: Cluster assignment file
-        pattern: "*"
-        ontologies: []
-output:
-  metrics:
-    - - meta:
-          type: map
-          description: Groovy Map containing sample information
-      - "*_metrics.tsv":
-          type: file
-          description: TSV with selected cluster quality metrics
-          pattern: "*_metrics.tsv"
-          ontologies:
-            - edam: http://edamontology.org/format_3475
-  k_sweep:
-    - - meta:
-          type: map
-          description: Groovy Map containing sample information
-      - "*_k_sweep.csv":
-          type: file
-          description: CSV with metrics for different values of k
-          pattern: "*_k_sweep.csv"
-          ontologies:
-            - edam: http://edamontology.org/format_3752
-  selected:
-    - - meta:
-          type: map
-          description: Groovy Map containing sample information
-      - "*_selected.json":
-          type: file
-          description: JSON with the selected/best metrics
-          pattern: "*_selected.json"
-          ontologies:
-            - edam: http://edamontology.org/format_3464
-  plots:
-    - - meta:
-          type: map
-          description: Groovy Map containing sample information
-      - "*.png":
-          type: file
-          description: Optional PNG plots (elbow, silhouette, etc.)
-          pattern: "*.png"
-          ontologies: []
-  versions:
-    - "versions.yml":
-        type: file
-        description: File containing software versions
-        pattern: "versions.yml"
-        ontologies:
-          - edam: http://edamontology.org/format_3750
-topics:
-  versions:
-    - versions.yml:
-        type: string
-        description: The name of the process
-authors:
-  - "@dbaku42"
-maintainers:
-  - "@dbaku42"
diff --git a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py b/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
deleted file mode 100644
index f8fb15f2e776..000000000000
--- a/modules/nf-core/custom/clustermetrics/templates/cluster_metrics.py
+++ /dev/null
@@ -1,289 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import json
-import platform
-import sys
-from pathlib import Path
-
-import matplotlib
-import numpy as np
-import pandas as pd
-import sklearn
-from sklearn.cluster import KMeans
-from sklearn.metrics import (
-    calinski_harabasz_score,
-    davies_bouldin_score,
-    silhouette_score,
-)
-
-matplotlib.use("Agg")
-
-
-def format_yaml_like(data: dict, indent: int = 0) -> str:
-    """Formats a dictionary to a YAML-like string (nf-core standard)."""
-    yaml_str = ""
-    for key, value in data.items():
-        spaces = "  " * indent
-        if isinstance(value, dict):
-            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
-        else:
-            yaml_str += f"{spaces}{key}: {value}\\n"
-    return yaml_str
-
-
-def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
-    df = df.copy()
-    df.columns = [str(c).lstrip("#") for c in df.columns]
-
-    cols_upper = {str(c).upper(): c for c in df.columns}
-
-    if "IID" in cols_upper:
-        iid_col = cols_upper["IID"]
-        dup_mask = df[iid_col].astype(str).str.upper().isin({"FID", "IID"})
-        if dup_mask.any():
-            df = df.loc[~dup_mask].copy().reset_index(drop=True)
-
-    cols_upper = {str(c).upper(): c for c in df.columns}
-
-    if "SAMPLE_ID" in cols_upper:
-        sample_col = cols_upper["SAMPLE_ID"]
-        if sample_col != "sample_id":
-            df = df.rename(columns={sample_col: "sample_id"})
-        return df
-
-    if "IID" in cols_upper:
-        iid_col = cols_upper["IID"]
-        iid_numeric = pd.to_numeric(df[iid_col], errors="coerce").notna().all()
-
-        if iid_numeric:
-            df = df.drop(columns=[iid_col])
-            if len(df.columns) == 0:
-                raise ValueError("Cannot infer sample_id after dropping numeric IID column")
-            df = df.rename(columns={df.columns[0]: "sample_id"})
-        else:
-            df = df.rename(columns={iid_col: "sample_id"})
-
-        fid_cols = [c for c in df.columns if str(c).upper() == "FID"]
-        if fid_cols:
-            df = df.drop(columns=fid_cols)
-
-        return df
-
-    raise ValueError(f"Cannot find sample ID column (expected 'sample_id' or 'IID'). Found: {list(df.columns)}")
-
-
-def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
-    df = pd.read_csv(path, sep="\\t", dtype=str)
-    df = _normalise_id_column(df)
-
-    if "sample_id" not in df.columns:
-        raise ValueError("features file must contain a sample_id column after normalization")
-
-    sample_ids = df["sample_id"].astype(str)
-    x = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce")
-    x = x.fillna(x.mean(numeric_only=True))
-    x = x.fillna(0.0)
-
-    return x, sample_ids
-
-
-def load_clusters(path: str) -> tuple[pd.Series, str]:
-    df = pd.read_csv(path)
-    if "sample_id" in df.columns and "cluster" in df.columns:
-        series = df.set_index(df["sample_id"].astype(str))["cluster"].astype(int)
-        return series, "sample_id"
-    elif "cluster" in df.columns:
-        series = df["cluster"].astype(int).reset_index(drop=True)
-        return series, "row_order"
-    else:
-        raise ValueError(
-            f"clusters file must have a 'cluster' column (and optionally 'sample_id'). Found: {list(df.columns)}"
-        )
-
-
-def safe_cluster_metrics(x: np.ndarray, labels: np.ndarray) -> dict:
-    uniq = np.unique(labels)
-    n_clusters = len(uniq) - (1 if -1 in uniq else 0)
-
-    if n_clusters < 2:
-        return {
-            "n_clusters": int(n_clusters),
-            "silhouette": None,
-            "calinski_harabasz": None,
-            "davies_bouldin": None,
-        }
-
-    mask = labels != -1
-    x_use, y_use = x[mask], labels[mask]
-
-    if len(x_use) < 2 or len(np.unique(y_use)) < 2:
-        return {
-            "n_clusters": int(n_clusters),
-            "silhouette": None,
-            "calinski_harabasz": None,
-            "davies_bouldin": None,
-        }
-
-    return {
-        "n_clusters": int(n_clusters),
-        "silhouette": float(silhouette_score(x_use, y_use)),
-        "calinski_harabasz": float(calinski_harabasz_score(x_use, y_use)),
-        "davies_bouldin": float(davies_bouldin_score(x_use, y_use)),
-    }
-
-
-def main() -> None:
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--features", required=True)
-    ap.add_argument("--clusters", required=True)
-    ap.add_argument("--k-min", type=int, default=2)
-    ap.add_argument("--k-max", type=int, default=12)
-    ap.add_argument("--out-k-sweep", required=True)
-    ap.add_argument("--out-selected", required=True)
-    ap.add_argument("--out-prefix", required=True)
-    args = ap.parse_args()
-
-    x_df, sample_ids = load_features(args.features)
-    clusters_s, cluster_mode = load_clusters(args.clusters)
-
-    if cluster_mode == "sample_id":
-        common = sample_ids[sample_ids.isin(clusters_s.index)]
-
-        if len(common) > 0:
-            x = x_df.loc[common.index].values
-            labels = clusters_s.loc[common.values].values
-            aligned_ids = common.astype(str).tolist()
-            alignment_mode = "sample_id"
-        elif len(clusters_s) == len(sample_ids):
-            x = x_df.values
-            labels = clusters_s.values
-            aligned_ids = sample_ids.astype(str).tolist()
-            alignment_mode = "row_order_fallback"
-        else:
-            raise ValueError(
-                f"No overlapping sample_id between features and clusters.\\n"
-                f"  features IDs (first 5): {sample_ids.head().tolist()}\\n"
-                f"  clusters IDs (first 5): {list(clusters_s.index[:5])}"
-            )
-    else:
-        if len(clusters_s) != len(sample_ids):
-            raise ValueError(
-                "clusters CSV has no usable sample_id column and row counts do not match.\\n"
-                f"  n_features={len(sample_ids)}\\n"
-                f"  n_clusters={len(clusters_s)}"
-            )
-        x = x_df.values
-        labels = clusters_s.values
-        aligned_ids = sample_ids.astype(str).tolist()
-        alignment_mode = "row_order"
-
-    if len(x) < 2:
-        raise ValueError("Need at least 2 samples to compute cluster metrics")
-
-    selected = safe_cluster_metrics(x, labels)
-    selected["input_clusters"] = Path(args.clusters).name
-    selected["input_features"] = Path(args.features).name
-    selected["n_samples_used"] = int(len(aligned_ids))
-    selected["alignment_mode"] = alignment_mode
-
-    metrics_tsv = f"{args.out_prefix}_metrics.tsv"
-    pd.DataFrame([selected]).to_csv(metrics_tsv, sep="\\t", index=False)
-
-    rows = []
-    max_k = min(int(args.k_max), len(x))
-    for k in range(int(args.k_min), max_k + 1):
-        model = KMeans(n_clusters=k, n_init=10, random_state=42)
-        y = model.fit_predict(x)
-
-        sil = ch = db = None
-        if 1 < len(np.unique(y)) < len(x):
-            sil = float(silhouette_score(x, y))
-            ch = float(calinski_harabasz_score(x, y))
-            db = float(davies_bouldin_score(x, y))
-
-        rows.append(
-            {
-                "k": k,
-                "inertia": float(model.inertia_),
-                "silhouette": sil,
-                "calinski_harabasz": ch,
-                "davies_bouldin": db,
-            }
-        )
-
-    sweep_df = pd.DataFrame(rows)
-    sweep_df.to_csv(args.out_k_sweep, sep=",", index=False, float_format="%.10g")
-    Path(args.out_selected).write_text(json.dumps(selected, indent=2))
-
-    pfx = args.out_prefix
-    try:
-        import matplotlib.pyplot as plt
-
-        def plot_curve(metric, title, ylabel, out_png):
-            plt.figure(figsize=(7, 4.5))
-            vals = sweep_df[metric].dropna()
-            ks = sweep_df.loc[vals.index, "k"]
-            plt.plot(ks, vals, marker="o")
-            plt.xticks(sweep_df["k"].tolist())
-            plt.title(title)
-            plt.xlabel("k")
-            plt.ylabel(ylabel)
-            plt.tight_layout()
-            plt.savefig(out_png, dpi=200)
-            plt.close()
-
-        if not sweep_df.empty:
-            plot_curve("inertia", "Elbow method (KMeans inertia)", "inertia", f"{pfx}_elbow.png")
-            plot_curve("silhouette", "Silhouette score (higher is better)", "silhouette", f"{pfx}_silhouette.png")
-            plot_curve(
-                "davies_bouldin",
-                "Davies-Bouldin index (lower is better)",
-                "davies_bouldin",
-                f"{pfx}_davies_bouldin.png",
-            )
-            plot_curve(
-                "calinski_harabasz",
-                "Calinski-Harabasz index (higher is better)",
-                "calinski_harabasz",
-                f"{pfx}_calinski.png",
-            )
-
-    except Exception as e:
-        Path("plot_warning.txt").write_text("Plotting failed: " + str(e) + "\\n")
-
-    # === VERSIONS.YML (fix review) ===
-    versions = {
-        "${task.process}": {
-            "python": platform.python_version(),
-            "pandas": pd.__version__,
-            "scikit-learn": sklearn.__version__,
-            "matplotlib": matplotlib.__version__,
-        }
-    }
-    with open("versions.yml", "w") as f:
-        f.write(format_yaml_like(versions))
-
-
-if __name__ == "__main__":
-    prefix = "${task.ext.prefix ? task.ext.prefix : meta.id}"
-
-    sys.argv = [
-        "cluster_metrics.py",
-        "--features",
-        "$features",
-        "--clusters",
-        "$clusters",
-        "--k-min",
-        "2",
-        "--k-max",
-        "12",
-        "--out-k-sweep",
-        f"{prefix}_k_sweep.csv",
-        "--out-selected",
-        f"{prefix}_selected.json",
-        "--out-prefix",
-        prefix,
-    ]
-
-    main()
diff --git a/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv b/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv
deleted file mode 100644
index 1258849b8fbe..000000000000
--- a/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-sample_id,cluster
-sample01,0
-sample02,2
-sample03,1
-sample04,2
-sample05,1
diff --git a/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv b/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv
deleted file mode 100644
index 033d23b82df8..000000000000
--- a/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv
+++ /dev/null
@@ -1,6 +0,0 @@
-sample_id	PC1	PC2	PC3
-sample01	0.1234	0.5678	0.9012
-sample02	-0.2345	0.6789	-0.0123
-sample03	0.3456	-0.7890	0.1234
-sample04	-0.4567	0.8901	-0.2345
-sample05	0.5678	-0.9012	0.3456
diff --git a/modules/nf-core/custom/clustermetrics/tests/main.nf.test b/modules/nf-core/custom/clustermetrics/tests/main.nf.test
deleted file mode 100644
index 32f903d2fdeb..000000000000
--- a/modules/nf-core/custom/clustermetrics/tests/main.nf.test
+++ /dev/null
@@ -1,70 +0,0 @@
-nextflow_process {
-
-    name "Test Process CUSTOM_CLUSTERMETRICS"
-    script "../main.nf"
-    process "CUSTOM_CLUSTERMETRICS"
-
-    tag "modules"
-    tag "modules_nfcore"
-    tag "custom"
-    tag "custom/clustermetrics"
-
-    test("clustermetrics - features and clusters") {
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ],
-                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true)
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.metrics,
-                    process.out.k_sweep,
-                    process.out.selected,
-                    process.out.versions,
-                    path(process.out.versions[0]).yaml
-                ).match() }
-            )
-        }
-    }
-
-    test("clustermetrics - features and clusters - stub") {
-
-        options "-stub"
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ],
-                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustermetrics/tests/data/test_clusters.csv", checkIfExists: true)
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.metrics,
-                    process.out.k_sweep,
-                    process.out.selected,
-                    process.out.plots,
-                    process.out.versions,
-                    path(process.out.versions[0]).yaml
-                ).match() }
-            )
-        }
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap b/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
deleted file mode 100644
index 789c38b0def7..000000000000
--- a/modules/nf-core/custom/clustermetrics/tests/main.nf.test.snap
+++ /dev/null
@@ -1,103 +0,0 @@
-{
-    "clustermetrics - features and clusters": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_metrics.tsv:md5,7bfdac9bca90ba2ea3c03eca25b24f28"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_k_sweep.csv:md5,8c382a00d959ae5d6e19f42cf9278f37"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_selected.json:md5,633493b1585fb0ec0a81629bde4c00cb"
-                ]
-            ],
-            [
-                "versions.yml:md5,67cab9dfa6c955b0927cf3ff6fc8c5bd"
-            ],
-            {
-                "CUSTOM_CLUSTERMETRICS": {
-                    "python": "3.12.12",
-                    "pandas": "2.2.3",
-                    "scikit-learn": "1.6.1",
-                    "matplotlib": "3.9.4"
-                }
-            }
-        ],
-        "timestamp": "2026-05-13T16:49:11.628681612",
-        "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.09.0"
-        }
-    },
-    "clustermetrics - features and clusters - stub": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_metrics.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_k_sweep.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test_selected.json:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    [
-                        "test_calinski.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                        "test_davies_bouldin.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                        "test_elbow.png:md5,d41d8cd98f00b204e9800998ecf8427e",
-                        "test_silhouette.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ]
-            ],
-            [
-                "versions.yml:md5,939e81a1c6d66dde0edb847e3e61defd"
-            ],
-            {
-                "CUSTOM_CLUSTERMETRICS": {
-                    "python": "3.12.12",
-                    "pandas": "2.2.3",
-                    "scikit-learn": "1.6.1",
-                    "matplotlib": "3.9.4"
-                }
-            }
-        ],
-        "timestamp": "2026-05-13T16:49:29.799534772",
-        "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.09.0"
-        }
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustervisualization/environment.yml b/modules/nf-core/custom/clustervisualization/environment.yml
deleted file mode 100644
index 8addb3305878..000000000000
--- a/modules/nf-core/custom/clustervisualization/environment.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-# clustervisualization/environment.yml
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - conda-forge::matplotlib=3.9.4
-  - conda-forge::numpy=2.4.2
-  - conda-forge::pandas=2.2.3
-  - conda-forge::python=3.12.12
-  - conda-forge::scikit-learn=1.6.1
-  - conda-forge::seaborn=0.13.2
-  - conda-forge::umap-learn=0.5.12
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustervisualization/main.nf b/modules/nf-core/custom/clustervisualization/main.nf
deleted file mode 100644
index 7d9156a0cb14..000000000000
--- a/modules/nf-core/custom/clustervisualization/main.nf
+++ /dev/null
@@ -1,42 +0,0 @@
-process CUSTOM_CLUSTERVISUALIZATION {
-    tag "$meta.id"
-    label 'process_medium'
-    conda "${moduleDir}/environment.yml"
-    container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-    'docker://community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' :
-    'community.wave.seqera.io/library/matplotlib_pandas_python_scikit-learn_pruned:054f91aaa56bd7d5' }"
-    input:
-    tuple val(meta), path(features), path(clusters)
-
-    output:
-    tuple val(meta), path("*.umap.tsv") , emit: umap_tsv
-    tuple val(meta), path("*.tsne.tsv") , emit: tsne_tsv
-    tuple val(meta), path("*.umap.png") , emit: umap_png, optional: true
-    tuple val(meta), path("*.tsne.png") , emit: tsne_png, optional: true
-    path "versions.yml"                 , emit: versions, topic: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    template 'cluster_viz.py'
-
-    stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.umap.tsv
-    touch ${prefix}.tsne.tsv
-    touch ${prefix}.umap.png
-    touch ${prefix}.tsne.png
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python3 --version | sed 's/Python //')
-        pandas: \$(python3 -c "import pandas; print(pandas.__version__)")
-        matplotlib: \$(python3 -c "import matplotlib; print(matplotlib.__version__)")
-        seaborn: \$(python3 -c "import seaborn; print(seaborn.__version__)")
-        umap-learn: \$(python3 -c "import umap; print(umap.__version__)")
-        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/custom/clustervisualization/meta.yml b/modules/nf-core/custom/clustervisualization/meta.yml
deleted file mode 100644
index 0d90ab6fa53d..000000000000
--- a/modules/nf-core/custom/clustervisualization/meta.yml
+++ /dev/null
@@ -1,96 +0,0 @@
-name: "CUSTOM_CLUSTERVISUALIZATION"
-description: "Generates UMAP and t-SNE visualizations colored by cluster"
-keywords:
-  - clustering
-  - visualization
-  - pca
-  - umap
-  - tsne
-  - dimension-reduction
-tools:
-  - scikit-learn:
-      description: "Machine learning library for dimension reduction (PCA, t-SNE)"
-      homepage: "https://scikit-learn.org/"
-      documentation: "https://scikit-learn.org/stable/modules/clustering.html"
-      licence:
-        - "BSD-3-Clause"
-      identifier: ""
-  - umap-learn:
-      description: "Uniform Manifold Approximation and Projection for dimension reduction"
-      homepage: "https://umap-learn.readthedocs.io/"
-      documentation: "https://umap-learn.readthedocs.io/en/latest/"
-      licence:
-        - "BSD-3-Clause"
-      identifier: ""
-input:
-  - - meta:
-        type: map
-        description: "Groovy Map containing sample information"
-    - features:
-        type: file
-        description: "TSV file with sample_id and numeric features"
-        pattern: "*.tsv"
-        ontologies:
-          - edam: "http://edamontology.org/format_3475"
-    - clusters:
-        type: file
-        description: "CSV/TSV file with sample_id and cluster assignment"
-        pattern: "*_clusters.*"
-        ontologies: []
-output:
-  umap_tsv:
-    - - meta:
-          type: map
-          description: "Groovy Map containing sample information"
-      - "*.umap.tsv":
-          type: file
-          description: "UMAP coordinates per sample"
-          pattern: "*.umap.tsv"
-          ontologies:
-            - edam: "http://edamontology.org/operation_2432"
-            - edam: http://edamontology.org/format_3475
-  tsne_tsv:
-    - - meta:
-          type: map
-          description: "Groovy Map containing sample information"
-      - "*.tsne.tsv":
-          type: file
-          description: "t-SNE coordinates per sample"
-          pattern: "*.tsne.tsv"
-          ontologies:
-            - edam: "http://edamontology.org/operation_2432"
-            - edam: http://edamontology.org/format_3475
-  umap_png:
-    - - meta:
-          type: map
-          description: "Groovy Map containing sample information"
-      - "*.umap.png":
-          type: file
-          description: "UMAP visualization coloured by cluster"
-          pattern: "*.umap.png"
-          ontologies: []
-  tsne_png:
-    - - meta:
-          type: map
-          description: "Groovy Map containing sample information"
-      - "*.tsne.png":
-          type: file
-          description: "t-SNE visualization coloured by cluster"
-          pattern: "*.tsne.png"
-          ontologies: []
-  versions:
-    - versions.yml:
-        type: file
-        description: "Software versions used in the module"
-        pattern: "versions.yml"
-        ontologies:
-          - edam: http://edamontology.org/format_3750
-topics:
-  versions:
-    - versions.yml:
-        type: string
-        description: The name of the process
-authors:
-  - "@dbaku42"
-maintainers:
-  - "@dbaku42"
diff --git a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py b/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
deleted file mode 100644
index c571d16ac884..000000000000
--- a/modules/nf-core/custom/clustervisualization/templates/cluster_viz.py
+++ /dev/null
@@ -1,234 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-
-# Fix numba + matplotlib in read-only Singularity container
-os.environ["NUMBA_CACHE_DIR"] = "/tmp"
-os.environ["MPLCONFIGDIR"] = "/tmp"
-
-import platform
-
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-import sklearn
-import umap as umap_module
-from sklearn.manifold import TSNE
-from umap import UMAP
-
-matplotlib.use("Agg")
-
-
-def format_yaml_like(data: dict, indent: int = 0) -> str:
-    """Formats a dictionary to a YAML-like string (nf-core standard)."""
-    yaml_str = ""
-    for key, value in data.items():
-        spaces = "  " * indent
-        if isinstance(value, dict):
-            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
-        else:
-            yaml_str += f"{spaces}{key}: {value}\\n"
-    return yaml_str
-
-
-def _normalise_id_column(df: pd.DataFrame) -> pd.DataFrame:
-    df = df.copy()
-    df.columns = [str(c).lstrip("#") for c in df.columns]
-
-    cols_upper = {str(c).upper(): c for c in df.columns}
-
-    if "IID" in cols_upper:
-        iid_col = cols_upper["IID"]
-        dup_mask = df[iid_col].astype(str).str.upper().isin({"FID", "IID"})
-        if dup_mask.any():
-            df = df.loc[~dup_mask].copy().reset_index(drop=True)
-
-    cols_upper = {str(c).upper(): c for c in df.columns}
-
-    if "SAMPLE_ID" in cols_upper:
-        sample_col = cols_upper["SAMPLE_ID"]
-        if sample_col != "sample_id":
-            df = df.rename(columns={sample_col: "sample_id"})
-        return df
-
-    if "IID" in cols_upper:
-        iid_col = cols_upper["IID"]
-        iid_numeric = pd.to_numeric(df[iid_col], errors="coerce").notna().all()
-
-        if iid_numeric:
-            df = df.drop(columns=[iid_col])
-            if len(df.columns) == 0:
-                raise ValueError("Cannot infer sample_id after dropping numeric IID column")
-            df = df.rename(columns={df.columns[0]: "sample_id"})
-        else:
-            df = df.rename(columns={iid_col: "sample_id"})
-
-        fid_cols = [c for c in df.columns if str(c).upper() == "FID"]
-        if fid_cols:
-            df = df.drop(columns=fid_cols)
-
-        return df
-
-    raise ValueError(f"Cannot find sample ID column (expected 'sample_id' or 'IID'). Found: {list(df.columns)}")
-
-
-def load_features(path: str) -> tuple[pd.DataFrame, pd.Series]:
-    df = pd.read_csv(path, sep="\\t", dtype=str)
-    df = _normalise_id_column(df)
-
-    if "sample_id" not in df.columns:
-        raise ValueError("features file must contain a sample_id column after normalization")
-
-    sample_ids = df["sample_id"].astype(str)
-    x = df.drop(columns=["sample_id"]).apply(pd.to_numeric, errors="coerce")
-    x = x.fillna(x.mean(numeric_only=True))
-    x = x.fillna(0.0)
-
-    return x, sample_ids
-
-
-def load_clusters(path: str) -> tuple[pd.DataFrame, str]:
-    """Load clusters and return (df, mode). Same logic as cluster_metrics."""
-    df = pd.read_csv(path, sep=",", dtype=str)
-    df = df.copy()
-    df.columns = [str(c).lstrip("#") for c in df.columns]
-
-    cols_upper = {str(c).upper(): c for c in df.columns}
-
-    if "CLUSTER" not in cols_upper:
-        raise ValueError("clusters CSV must have a 'cluster' column")
-
-    cluster_col = cols_upper["CLUSTER"]
-
-    if "SAMPLE_ID" in cols_upper:
-        sample_col = cols_upper["SAMPLE_ID"]
-        out = df[[sample_col, cluster_col]].copy()
-        out.columns = ["sample_id", "cluster"]
-        out["sample_id"] = out["sample_id"].astype(str)
-        out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
-        return out, "sample_id"
-
-    try:
-        norm = _normalise_id_column(df.copy())
-        if "sample_id" in norm.columns and "cluster" in norm.columns:
-            out = norm[["sample_id", "cluster"]].copy()
-            out["sample_id"] = out["sample_id"].astype(str)
-            out["cluster"] = pd.to_numeric(out["cluster"], errors="raise").astype(int)
-            return out, "sample_id"
-    except Exception:
-        pass
-
-    other_cols = [c for c in df.columns if c != cluster_col]
-
-    if len(other_cols) == 1:
-        candidate = other_cols[0]
-        candidate_vals = df[candidate].astype(str)
-
-        if not (
-            len(candidate_vals) > 0 and float(pd.to_numeric(candidate_vals, errors="coerce").notna().mean()) >= 0.8
-        ):
-            out = pd.DataFrame(
-                {
-                    "sample_id": candidate_vals,
-                    "cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int),
-                }
-            )
-            return out, "sample_id"
-
-    out = pd.DataFrame({"cluster": pd.to_numeric(df[cluster_col], errors="raise").astype(int)})
-    return out, "row_order"
-
-
-def plot_embedding(x: np.ndarray, labels: np.ndarray, method: str, prefix: str) -> None:
-    """Plot UMAP or t-SNE with cluster coloring."""
-    if method == "umap":
-        reducer = UMAP(random_state=42)
-        embedding = reducer.fit_transform(x)
-        title = "UMAP"
-        out_tsv = f"{prefix}.umap.tsv"
-        out_png = f"{prefix}.umap.png"
-    else:  # tsne
-        reducer = TSNE(n_components=2, random_state=42, perplexity=min(30, len(x) - 1))
-        embedding = reducer.fit_transform(x)
-        title = "t-SNE"
-        out_tsv = f"{prefix}.tsne.tsv"
-        out_png = f"{prefix}.tsne.png"
-
-    # Save embedding
-    emb_df = pd.DataFrame(embedding, columns=["Dim1", "Dim2"])
-    emb_df["cluster"] = labels
-    emb_df.to_csv(out_tsv, sep="\\t", index=False)
-
-    # Plot
-    plt.figure(figsize=(8, 6))
-    palette = sns.color_palette("tab10", n_colors=len(np.unique(labels)))
-    sns.scatterplot(
-        x=embedding[:, 0],
-        y=embedding[:, 1],
-        hue=labels.astype(str),
-        palette=palette,
-        alpha=0.8,
-        s=60,
-        edgecolor="k",
-        linewidth=0.3,
-    )
-    plt.title(f"{title} projection of features colored by cluster")
-    plt.xlabel(f"{title} 1")
-    plt.ylabel(f"{title} 2")
-    plt.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc="upper left")
-    plt.tight_layout()
-    plt.savefig(out_png, dpi=200, bbox_inches="tight")
-    plt.close()
-
-
-def main() -> None:
-    features = "$features"
-    clusters_path = "$clusters"
-    prefix = "${task.ext.prefix ?: meta.id}"
-
-    x_df, sample_ids = load_features(features)
-    clusters_df, cluster_mode = load_clusters(clusters_path)
-
-    if cluster_mode == "sample_id":
-        clusters = clusters_df.set_index("sample_id")["cluster"]
-        common = sample_ids[sample_ids.isin(clusters.index)]
-        if len(common) > 0:
-            x = x_df.loc[common.index].values
-            labels = clusters.loc[common.values].values
-        elif len(clusters_df) == len(sample_ids):
-            x = x_df.values
-            labels = clusters_df["cluster"].values
-        else:
-            raise ValueError("No overlapping sample_id between features and clusters")
-    else:
-        if len(clusters_df) != len(sample_ids):
-            raise ValueError("Row counts do not match and no sample_id column found")
-        x = x_df.values
-        labels = clusters_df["cluster"].values
-
-    if len(x) < 2:
-        raise ValueError("Need at least 2 samples for embedding")
-
-    # Generate both embeddings
-    plot_embedding(x, labels, "umap", prefix)
-    plot_embedding(x, labels, "tsne", prefix)
-
-    # versions.yml
-    versions = {
-        "${task.process}": {
-            "python": platform.python_version(),
-            "pandas": pd.__version__,
-            "matplotlib": matplotlib.__version__,
-            "seaborn": sns.__version__,
-            "umap-learn": umap_module.__version__,
-            "scikit-learn": sklearn.__version__,
-        }
-    }
-    with open("versions.yml", "w") as f:
-        f.write(format_yaml_like(versions))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv b/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv
deleted file mode 100644
index 1258849b8fbe..000000000000
--- a/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-sample_id,cluster
-sample01,0
-sample02,2
-sample03,1
-sample04,2
-sample05,1
diff --git a/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv b/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv
deleted file mode 100644
index 033d23b82df8..000000000000
--- a/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv
+++ /dev/null
@@ -1,6 +0,0 @@
-sample_id	PC1	PC2	PC3
-sample01	0.1234	0.5678	0.9012
-sample02	-0.2345	0.6789	-0.0123
-sample03	0.3456	-0.7890	0.1234
-sample04	-0.4567	0.8901	-0.2345
-sample05	0.5678	-0.9012	0.3456
diff --git a/modules/nf-core/custom/clustervisualization/tests/data/test_pca.eigenvec b/modules/nf-core/custom/clustervisualization/tests/data/test_pca.eigenvec
deleted file mode 100644
index 61aae5d8b413..000000000000
--- a/modules/nf-core/custom/clustervisualization/tests/data/test_pca.eigenvec
+++ /dev/null
@@ -1,6 +0,0 @@
-#FID IID PC1 PC2 PC3
-0 sample01 0.1234 0.5678 0.9012
-0 sample02 -0.2345 0.6789 -0.0123
-0 sample03 0.3456 -0.7890 0.1234
-0 sample04 -0.4567 0.8901 -0.2345
-0 sample05 0.5678 -0.9012 0.3456
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test b/modules/nf-core/custom/clustervisualization/tests/main.nf.test
deleted file mode 100644
index 1256c87895f0..000000000000
--- a/modules/nf-core/custom/clustervisualization/tests/main.nf.test
+++ /dev/null
@@ -1,69 +0,0 @@
-nextflow_process {
-
-    name "Test Process CUSTOM_CLUSTERVISUALIZATION"
-    script "../main.nf"
-    process "CUSTOM_CLUSTERVISUALIZATION"
-
-    tag "modules"
-    tag "modules_nfcore"
-    tag "custom"
-    tag "custom/clustervisualization"
-
-    test("clustervisualization - features and clusters") {
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ],
-                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true)
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.umap_tsv,
-                    process.out.tsne_tsv,
-                    process.out.versions,
-                    path(process.out.versions[0]).yaml
-                ).match() }
-            )
-        }
-    }
-
-    test("clustervisualization - features and clusters - stub") {
-
-        options "-stub"
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [ id:'test' ],
-                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_features.tsv", checkIfExists: true),
-                    file("${projectDir}/modules/nf-core/custom/clustervisualization/tests/data/test_clusters.csv", checkIfExists: true)
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.umap_tsv,
-                    process.out.tsne_tsv,
-                    process.out.umap_png,
-                    process.out.tsne_png,
-                    process.out.versions,
-                    path(process.out.versions[0]).yaml
-                ).match() }
-            )
-        }
-    }
-}
\ No newline at end of file
diff --git a/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap b/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
deleted file mode 100644
index 03d8e5832d23..000000000000
--- a/modules/nf-core/custom/clustervisualization/tests/main.nf.test.snap
+++ /dev/null
@@ -1,94 +0,0 @@
-{
-    "clustervisualization - features and clusters - stub": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test.umap.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test.tsne.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test.umap.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test.tsne.png:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            ],
-            [
-                "versions.yml:md5,f79a1469aa94553d8d58281262a9d76b"
-            ],
-            {
-                "CUSTOM_CLUSTERVISUALIZATION": {
-                    "python": "3.12.12",
-                    "pandas": "2.2.3",
-                    "matplotlib": "3.9.4",
-                    "seaborn": "0.13.2",
-                    "umap-learn": "0.5.12",
-                    "scikit-learn": "1.6.1"
-                }
-            }
-        ],
-        "timestamp": "2026-05-13T16:50:23.618656008",
-        "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.09.0"
-        }
-    },
-    "clustervisualization - features and clusters": {
-        "content": [
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test.umap.tsv:md5,50c3bb50b36a174c55dd45201e9c0036"
-                ]
-            ],
-            [
-                [
-                    {
-                        "id": "test"
-                    },
-                    "test.tsne.tsv:md5,738a97587fa8c72614d2655eddbd2f7a"
-                ]
-            ],
-            [
-                "versions.yml:md5,c9099db9969c17be8e1f69dfd9ed925a"
-            ],
-            {
-                "CUSTOM_CLUSTERVISUALIZATION": {
-                    "python": "3.12.12",
-                    "pandas": "2.2.3",
-                    "matplotlib": "3.9.4",
-                    "seaborn": "0.13.2",
-                    "umap-learn": "0.5.12",
-                    "scikit-learn": "1.6.1"
-                }
-            }
-        ],
-        "timestamp": "2026-05-13T16:49:58.200828019",
-        "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.09.0"
-        }
-    }
-}
\ No newline at end of file

From 280aac970bdb6ccf7e7cd1e30d3c41a837b85ba2 Mon Sep 17 00:00:00 2001
From: dbaku42 <dbaku42@gmail.com>
Date: Wed, 13 May 2026 22:26:54 +0200
Subject: [PATCH 37/38] test: update clustering snapshot after pyyaml addition

---
 modules/nf-core/custom/clustering/tests/main.nf.test.snap | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/nf-core/custom/clustering/tests/main.nf.test.snap b/modules/nf-core/custom/clustering/tests/main.nf.test.snap
index b852b800bcb4..8a29091d57a2 100644
--- a/modules/nf-core/custom/clustering/tests/main.nf.test.snap
+++ b/modules/nf-core/custom/clustering/tests/main.nf.test.snap
@@ -63,14 +63,14 @@
                     {
                         "id": "test"
                     },
-                    "test_clustering_info.json:md5,6e61eece1d6cad24489312531115e55a"
+                    "test_clustering_info.json:md5,c4cb7430071a48a117eae03f66e654ed"
                 ]
             ],
             [
                 "versions.yml:md5,a5f57bd446ec1ba732607243bebd93fc"
             ]
         ],
-        "timestamp": "2026-05-13T18:53:15.637280975",
+        "timestamp": "2026-05-13T22:26:38.454903789",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "25.09.0"

From 9d98375e7b8584de8f739c31f76e20aab92e0d28 Mon Sep 17 00:00:00 2001
From: Donald Baku <141358602+dbaku42@users.noreply.github.com>
Date: Thu, 14 May 2026 12:04:22 +0200
Subject: [PATCH 38/38] Add pyyaml version 6.0.2 to environment.yml

---
 modules/nf-core/custom/clustering/environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/nf-core/custom/clustering/environment.yml b/modules/nf-core/custom/clustering/environment.yml
index 4b7a89234526..6dd100648188 100644
--- a/modules/nf-core/custom/clustering/environment.yml
+++ b/modules/nf-core/custom/clustering/environment.yml
@@ -12,3 +12,4 @@ dependencies:
   - conda-forge::scikit-learn=1.6.1
   - conda-forge::seaborn=0.13.2
   - conda-forge::umap-learn=0.5.12
+  - conda-forge::pyyaml=6.0.2