GreenleafLab · vinsfan368 · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,57 @@
+name: CI
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    defaults:
+      run:
+        shell: bash -l {0}   # so conda init works
+
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v4
+
+      - name: Set up Conda (Mambaforge)
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniforge-variant: Miniforge3
+          python-version: "3.12"
+          auto-update-conda: false
+          activate-environment: smf_snakemake
+          use-mamba: true
+      - name: Configure conda with strict channel priority
+        run: |
+          conda config --set channel_priority strict
+
+      - name: Install Snakemake + basic deps
+        run: |
+          mamba install -y -c conda-forge -c bioconda "snakemake>=9" pandas
+
+      - name: Show conda envs
+        run: conda info --envs
+
+      - name: Run unit tests via Snakemake
+        working-directory: tests
+        run: |
+          snakemake unit_tests \
+            --cores 1 \
+            --use-conda
+
+      - name: Run methyltransferase pipeline test
+        working-directory: tests
+        run: |
+          snakemake test_methyltransferase_pipeline \
+            --cores 2 \
+            --use-conda
+
+      - name: Run deaminase pipeline test
+        working-directory: tests
+        run: |
+          snakemake test_deaminase_pipeline \
+            --cores 2 \
+            --use-conda
diff --git a/config/samples.tsv b/config/samples.tsv
diff --git a/example_files/deaminase1_S1_R1_001.fastq.gz b/example_files/deaminase1_S1_R1_001.fastq.gz
diff --git a/example_files/deaminase1_S1_R2_001.fastq.gz b/example_files/deaminase1_S1_R2_001.fastq.gz
diff --git a/example_files/deaminase2_S2_R1_001.fastq.gz b/example_files/deaminase2_S2_R1_001.fastq.gz
diff --git a/example_files/deaminase2_S2_R2_001.fastq.gz b/example_files/deaminase2_S2_R2_001.fastq.gz
diff --git a/config/config.yaml → example_files/deaminase_config.yaml b/config/config.yaml → example_files/deaminase_config.yaml
@@ -1,4 +1,9 @@
 # This file should contain everything to configure the workflow on a global scale.
 # In case of sample based data, it should be complemented by a samples.tsv file that contains
 # one row per sample. It can be parsed easily via pandas.
-samples: "amplicon-smf/config/samples.tsv"
+
+samples: "example_files/deaminase_samplesheet.tsv"
+alignment_score_fraction: 0.8
+alignment_length_fraction: 0.8
+read1_length: 200
+read2_length: 200
diff --git a/example_files/deaminase_samplesheet.tsv b/example_files/deaminase_samplesheet.tsv
@@ -0,0 +1,3 @@
+sample_name	fastq_R1	fastq_R2	amplicon_fa	experiment	filter_contigs	no_endog_meth	ignore_bounds	deaminase
+deaminase1	example_files/deaminase1_S1_R1_001.fastq.gz	example_files/deaminase1_S1_R2_001.fastq.gz	example_files/opJS45.amplicon.fa	deaminase_test	FALSE	TRUE	FALSE	TRUE
+deaminase2	example_files/deaminase2_S2_R1_001.fastq.gz	example_files/deaminase2_S2_R2_001.fastq.gz	example_files/opJS45.amplicon.fa	deaminase_test	FALSE	TRUE	FALSE	TRUE
diff --git a/example_files/example_samplesheet.txt b/example_files/example_samplesheet.txt
diff --git a/example_files/example_config.yaml → example_files/methyltransferase_config.yaml b/example_files/example_config.yaml → example_files/methyltransferase_config.yaml
@@ -1,7 +1,8 @@
 # This file should contain everything to configure the workflow on a global scale.
 # In case of sample based data, it should be complemented by a samples.tsv file that contains
 # one row per sample. It can be parsed easily via pandas.
-samples: "amplicon-smf/example_files/example_samplesheet.txt"
+
+samples: "example_files/methyltransferase_samplesheet.tsv"
 alignment_score_fraction: 0.8
 alignment_length_fraction: 0.8
 read1_length: 235

diff --git a/example_files/example_fastq_R1_001.fastq.gz → ...s/methyltransferase_fastq_R1_001.fastq.gz b/example_files/example_fastq_R1_001.fastq.gz → ...s/methyltransferase_fastq_R1_001.fastq.gz
diff --git a/example_files/example_fastq_R2_001.fastq.gz → ...s/methyltransferase_fastq_R2_001.fastq.gz b/example_files/example_fastq_R2_001.fastq.gz → ...s/methyltransferase_fastq_R2_001.fastq.gz
diff --git a/example_files/methyltransferase_samplesheet.tsv b/example_files/methyltransferase_samplesheet.tsv
@@ -0,0 +1,2 @@
+sample_name	fastq_R1	fastq_R2	amplicon_fa	experiment	filter_contigs	include_cpg	ignore_bounds
+methyltransferase	example_files/methyltransferase_fastq_R1_001.fastq.gz	example_files/methyltransferase_fastq_R2_001.fastq.gz	example_files/opJS45.amplicon.long.fa	methyltransferase_test	TRUE	FALSE	FALSE
diff --git a/example_files/opJS45.amplicon.fa b/example_files/opJS45.amplicon.fa
diff --git a/example_files/opJS45.amplicon.long.fa b/example_files/opJS45.amplicon.long.fa
diff --git a/tests/Snakefile b/tests/Snakefile
@@ -1,8 +1,121 @@
-# Run tests with `snakemake --cores 1 --use-conda tests` in the tests directory
+"""
+Snakemake tests for key functions and smoke tests
+on small methyltransferase and deaminase data. 
 
-rule tests:
-    message: "Running unit tests..."
-    conda: "../workflow/rules/envs/smf_py3_v7.yaml"
+In the tests directory:
+- run unit tests with 
+  `snakemake unit_tests -c 1 --use-conda`
+- run pipeline tests with 
+  `snakemake test_methyltransferase_pipeline -c 1 --forceall`
+  `snakemake test_deaminase_pipeline -c 1 --forceall`
+
+Previous pipeline test results will be deleted if any test is run!
+"""
+import os
+import pandas as pd
+from snakemake.common.configfile import load_configfile
+
+# Figure out paths to some folders
+TESTS_DIR = workflow.basedir
+REPO_ROOT = os.path.abspath(os.path.join(TESTS_DIR, ".."))
+EXAMPLES_DIR = os.path.join(REPO_ROOT, "example_files")
+WORKFLOW_DIR = os.path.join(REPO_ROOT, "workflow")
+RESULTS_DIR = os.path.join(REPO_ROOT, "results")
+
+# For both methylation and deamination, figure out where output files will
+# be and pick a file to check if the pipeline ran all the way through
+METH_CONFIG = os.path.join(EXAMPLES_DIR, "methyltransferase_config.yaml")
+_meth_cfg   = load_configfile(METH_CONFIG)
+_meth_samplesheet = _meth_cfg["samples"]
+if not os.path.isabs(_meth_samplesheet):
+    _meth_samplesheet = os.path.join(REPO_ROOT, _meth_samplesheet)
+_meth_df = pd.read_csv(_meth_samplesheet, sep="\t")
+_meth_experiments = sorted(_meth_df["experiment"].unique())
+if len(_meth_experiments) != 1:
+    raise ValueError(f"Expected exactly one experiment in methyl samplesheet, got {_meth_experiments}")
+METH_SAMPLE = _meth_df.loc[0, "sample_name"]
+METH_EXPERIMENT = _meth_experiments[0]
+METH_TEST_OUTPUT = os.path.join(RESULTS_DIR,
+                                METH_EXPERIMENT,
+                                METH_SAMPLE,
+                                "stats",
+                                "methyltransferase.nuc_len_qc.stats.txt")
+
+DEAM_CONFIG = os.path.join(EXAMPLES_DIR, "deaminase_config.yaml")
+_deam_cfg   = load_configfile(DEAM_CONFIG)
+_deam_samplesheet = _deam_cfg["samples"]
+if not os.path.isabs(_deam_samplesheet):
+    _deam_samplesheet = os.path.join(REPO_ROOT, _deam_samplesheet)
+_deam_df = pd.read_csv(_deam_samplesheet, sep="\t")
+_deam_experiments = sorted(_deam_df["experiment"].unique())
+if len(_deam_experiments) != 1:
+    raise ValueError(f"Expected exactly one experiment in deamination samplesheet, got {_deam_experiments}")
+DEAM_SAMPLE = _deam_df.loc[0, "sample_name"]
+DEAM_EXPERIMENT = _deam_experiments[0]
+DEAM_TEST_OUTPUT = os.path.join(RESULTS_DIR,
+                                DEAM_EXPERIMENT,
+                                DEAM_SAMPLE,
+                                "stats",
+                                f"{DEAM_SAMPLE}.nuc_len_qc.stats.txt")
+
+
+rule test_methyltransferase_pipeline:
+    message:
+        "Running methyltransferase pipeline with test data..."
+    output:
+        stats = METH_TEST_OUTPUT
+    params:
+        repo_root = REPO_ROOT,
+        config_file = METH_CONFIG,
+    threads:
+        workflow.cores
+    run:
+        # Run pipeline
+        shell(r"""
+            cd {params.repo_root}
+            snakemake \
+              -s workflow/Snakefile \
+              -w 15 \
+              --configfile {params.config_file} \
+              --cores {threads} \
+              --use-conda \
+              --forceall
+        """)
+        # Check that output exists
+        assert os.path.exists(output.stats), f"Missing stats file: {output.stats}"
+        # TODO: put file content checks here
+
+rule test_deaminase_pipeline:
+    message:
+        "Running deaminase pipeline with test data..."
+    output:
+        stats = DEAM_TEST_OUTPUT
+    params:
+        repo_root = REPO_ROOT,
+        config_file = DEAM_CONFIG,
+    threads:
+        workflow.cores
+    run:
+        # Run pipeline
+        shell(r"""
+            cd {params.repo_root}
+            snakemake \
+              -s workflow/Snakefile \
+              -w 15 \
+              --configfile {params.config_file} \
+              --cores {threads} \
+              --use-conda \
+              --forceall
+        """)
+        # Check that output exists
+        assert os.path.exists(output.stats), f"Missing stats file: {output.stats}"
+        # TODO: put file content checks here
+
+rule unit_tests:
+    message: 
+        "Running unit tests..."
+    conda:
+        os.path.join(REPO_ROOT, "workflow", "rules", "envs", "python3_v7.yaml")
     shell:
         r"""
         PYTHONPATH=../workflow/scripts \

diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -13,7 +13,4 @@ include: "rules/other.smk"
 
 rule all:
     input:
-        all_input
-
-
-
+        all_input
diff --git a/workflow/rules/envs/smf_py3_v7.yaml → workflow/rules/envs/python3_v7.yaml b/workflow/rules/envs/smf_py3_v7.yaml → workflow/rules/envs/python3_v7.yaml
@@ -7,17 +7,15 @@ channels:
 dependencies:
   - biopython
   - bwa
-  - fastcluster
-  - htslib
   - matplotlib
   - methyldackel
   - numpy
   - pandas
   - pysam
-  - python
+  - python=3.13
   - samtools
   - scikit-learn
   - scipy
   - seaborn
   - toolshed
-  - fastqc
+  - fastqc
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		sample_name fastq_R1 fastq_R2 amplicon_fa experiment filter_contigs include_cpg ignore_bounds
		methyltransferase example_files/methyltransferase_fastq_R1_001.fastq.gz example_files/methyltransferase_fastq_R2_001.fastq.gz example_files/opJS45.amplicon.long.fa methyltransferase_test TRUE FALSE FALSE