CCBR · kelly-sovacool · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025
diff --git a/...orkflows/autoassig_milestone_to_issue.yml → .github/workflows/assign-milestone-issue.yml b/...orkflows/autoassig_milestone_to_issue.yml → .github/workflows/assign-milestone-issue.yml
diff --git a/.github/workflows/auto-format.yml b/.github/workflows/auto-format.yml
@@ -0,0 +1,37 @@
+name: auto-format
+
+on:
+  workflow_dispatch:
+  pull_request:
+
+env:
+  GH_TOKEN: ${{ github.token }}
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  auto-format:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        if: github.event_name == 'pull_request'
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
+      - uses: actions/checkout@v4
+        if: github.event_name != 'pull_request'
+        with:
+          fetch-depth: 0
+
+      - name: format
+        uses: pre-commit/action@v3.0.1
+        continue-on-error: true
+      - name: commit & push
+        run: |
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git add .
+          git commit -m "ci: 🤖 format everything with pre-commit" && git push || echo "nothing to commit"
diff --git a/.github/workflows/auto_add_reponame_labels.yml b/.github/workflows/auto_add_reponame_labels.yml
diff --git a/.github/workflows/draft-release.yml b/.github/workflows/draft-release.yml
@@ -14,6 +14,7 @@ on:
 
 permissions:
   contents: write
+  pull-requests: write
   actions: write
 
 jobs:
@@ -23,7 +24,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0 # required to include tags
-      - uses: CCBR/actions/draft-release@main
+      - uses: CCBR/actions/draft-release@v0.2
         with:
           github-token: ${{ github.token }}
           version-tag: ${{ github.event.inputs.version-tag }}
diff --git a/.github/workflows/label-issues-repo-name.yml b/.github/workflows/label-issues-repo-name.yml
@@ -0,0 +1,21 @@
+name: label-issues-repo-name
+
+on:
+  issues:
+    types:
+      - opened
+  pull_request:
+    types:
+      - opened
+
+permissions:
+  issues: write
+  pull-requests: write
+
+jobs:
+  add-label:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: CCBR/actions/label-issue-repo-name@main
+        with:
+          github-token: ${{ github.token }}
diff --git a/.github/workflows/post-release.yml b/.github/workflows/post-release.yml
@@ -8,7 +8,7 @@ on:
 permissions:
   contents: write
   pull-requests: write
-  issues: write
+  actions: write
 
 jobs:
   cleanup:
@@ -17,6 +17,6 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
-      - uses: CCBR/actions/post-release@main
+      - uses: CCBR/actions/post-release@v0.2
         with:
           github-token: ${{ github.token }}
diff --git a/.test/README.md b/.test/README.md
@@ -1,7 +1,8 @@
 ## Test data
+
 2 mouse samples:
 
-* iCre_D0
-* D4_Meso_iCre_Dox
+- iCre_D0
+- D4_Meso_iCre_Dox
 
-Each sample has 2 replicates. Each replicate has been subsampled to only include reads aligning to chr19:10,000,000-20,000,000
+Each sample has 2 replicates. Each replicate has been subsampled to only include reads aligning to chr19:10,000,000-20,000,000
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # ASPEN
 
-**A**tac **S**eq **P**ip**E**li**N**e : 
+**A**tac **S**eq **P**ip**E**li**N**e :
 
 [CCBR](https://bioinformatics.ccr.cancer.gov/ccbr/) recommends ASPEN to effectively analyze ATAC-seq datasets on the [BIOWULF](https://hpc.nih.gov) HPC system at the [NIH](https://www.nih.gov/).
 
@@ -12,6 +12,7 @@
 ```bash
 module load ccbrpipeliner/7
 ```
+
 ```bash
 aspen --help
 ```

diff --git a/config/README.md b/config/README.md
@@ -1,5 +1,6 @@
 ## Config files
-* `config.yaml`: sets all the runtime configurations for the pipeline like output folder, resources folder, genome, tool-parameters etc.
-* `samples.tsv`: tab delimited sample manifest. Defaults to the samples in the .test folder of the pipeline
-* `multiqc_atacseq_config.yaml`: custom-multiqc config file for report generation
-* `create_test_config_sample_manifest_files.bash`: replicates variables `PIPELINE_HOME` and `WORKDIR` etc. to create sample manifest for testing purposes
+
+- `config.yaml`: sets all the runtime configurations for the pipeline like output folder, resources folder, genome, tool-parameters etc.
+- `samples.tsv`: tab delimited sample manifest. Defaults to the samples in the .test folder of the pipeline
+- `multiqc_atacseq_config.yaml`: custom-multiqc config file for report generation
+- `create_test_config_sample_manifest_files.bash`: replicates variables `PIPELINE_HOME` and `WORKDIR` etc. to create sample manifest for testing purposes
diff --git a/config/multiqc_atacseq_config.yaml b/config/multiqc_atacseq_config.yaml
@@ -1,115 +1,115 @@
 custom_data:
-    Nreads:
-        section_name: 'Nreads'
-        description: 'Number of reads per replicate. Mitochondrial fraction should be < 10%.'
-    nrf_stats:
-        file_format: 'tsv'
-        section_name: 'NRF PBC Stats'
-        description: '
-            Non-redundant Fraction (NRF): Number of distinct uniquely mapping reads (i.e. after removing duplicates) / Total number of reads. 
-            PCR Bottlenecking Coefficient 1 (PBC1): (number of genomic locations where exactly one read maps uniquely) / (number of distinct genomic locations to which some read maps uniquely). 
-            PCR Bottlenecking Coefficient 2 (PBC2): (number of genomic locations where only one read maps uniquely) / (number of genomic locations where two reads map uniquely). 
-            The preferred values are as follows: NRF>0.9, PBC1>0.9, and PBC2>3. '
-        plot_type: 'table'
-        pconfig:
-            id: 'NRF Stats'
-            title: 'NRF Stats table'
-    fld_files:
-        file_format: 'tsv'
-        section_name: 'Fragment Length Distribution'
-        description: 'Per sample FLD. Peak < 150bp = Nucleosome Free Peak, Peak between 150-300 bp = Mononucleosome Peak, Peak > 300bp = Dinucleosome Peak'
-        plot_type: 'linegraph'
-        pconfig:
-            id: 'FLD'
-            title: 'Fragment Length Distribution'
-            ylab: 'Normalized read density X 1e3'
-            xlab: 'Fragment Length'
-            xmax: 1000
-    fld_stats_peaks:
-        file_format: 'tsv'
-        section_name: 'FLD Stats (Peaks)'
-        description: 'Presence or Absense of nucleosome-free, mono and di-nucleosome peaks in FLD. A nucleosome free region (NFR) must be present. A mononucleosome peak must be present.'
-        plot_type: 'table'
-        pconfig:
-            id: 'FLD Stats'
-            title: 'FLD Stats table'
-    fld_stats_details:
-        file_format: 'tsv'
-        section_name: 'FLD Stats (Fractions and Ratios)'
-        description: 'Fractions and Ratios of interest'
-        plot_type: 'table'
-        pconfig:
-            id: 'FLD Stats'
-            title: 'FLD Stats table'
-    MACS2_Peak_Annotations:
-        section_name: 'MACS2 Peaks'
-        description: 'Peaks called using MACS2. For human or mouse data, Npeaks should be >150,000, though values >100,000 may be acceptable. '
-    Genrich_Peak_Annotations:
-        section_name: 'Genrich Peaks'
-        description: 'Peaks called using Genrich. For human or mouse data, Npeaks should be >150,000, though values >100,000 may be acceptable. '
-    peak_width_files:
-        file_format: 'tsv'
-        section_name: 'Peak width distribution'
-        description: 'Peak width distribution of consensus peaks.'
-        plot_type: 'linegraph'
-        pconfig:
-            id: 'PWD'
-            title: 'Peak Width Distribution'
-            ylab: 'Peak Density Percentage'
-            xlab: 'Peak Width'
-            xmax: 20000
-    frip_stats:
-        file_format: 'tsv'
-        section_name: 'FRiP Stats'
-        description: 'The fraction of reads in called peak regions (FRiP score) should be >0.3, though values greater than 0.2 are acceptable. Fraction of Reads in Peaks/DHS/Enhancers/Promoters are also reported. For human or mouse data, FRiPromoters is 12-20%'
-        plot_type: 'table'
-        pconfig:
-            id: 'FRiP Stats'
-            title: 'FRiP Stats table'
-    tss_files:
-        file_format: 'tsv'
-        section_name: 'TSS distribution'
-        description: 'Greenleaf Normalized TSS per sample distribution'
-        plot_type: 'linegraph'
-        pconfig:
-            id: 'TSS Enrichment'
-            title: 'TSS Enrichment Distribution'
-            ylab: 'Greenleaf Normalized TSS Enrichment'
-            xlab: 'Distance from TSS'
-    tss_knicking_sites_files:
-        file_format: 'tsv'
-        section_name: 'TSS Score Scatter'
-        description: 'TSS score to TSS with >20 Tn5knicking sites scatter. '
-        plot_type: 'scatter'
-        pconfig:
-            id: 'TSS_scatter'
-            title: 'TSS_Score_Scatter'
-            ylab: 'TSS score'
-            xlab: 'Number of TSS sites with > 20 Tn5 knick sites'
+  Nreads:
+    section_name: "Nreads"
+    description: "Number of reads per replicate. Mitochondrial fraction should be < 10%."
+  nrf_stats:
+    file_format: "tsv"
+    section_name: "NRF PBC Stats"
+    description: "
+      Non-redundant Fraction (NRF): Number of distinct uniquely mapping reads (i.e. after removing duplicates) / Total number of reads.
+      PCR Bottlenecking Coefficient 1 (PBC1): (number of genomic locations where exactly one read maps uniquely) / (number of distinct genomic locations to which some read maps uniquely).
+      PCR Bottlenecking Coefficient 2 (PBC2): (number of genomic locations where only one read maps uniquely) / (number of genomic locations where two reads map uniquely).
+      The preferred values are as follows: NRF>0.9, PBC1>0.9, and PBC2>3. "
+    plot_type: "table"
+    pconfig:
+      id: "NRF Stats"
+      title: "NRF Stats table"
+  fld_files:
+    file_format: "tsv"
+    section_name: "Fragment Length Distribution"
+    description: "Per sample FLD. Peak < 150bp = Nucleosome Free Peak, Peak between 150-300 bp = Mononucleosome Peak, Peak > 300bp = Dinucleosome Peak"
+    plot_type: "linegraph"
+    pconfig:
+      id: "FLD"
+      title: "Fragment Length Distribution"
+      ylab: "Normalized read density X 1e3"
+      xlab: "Fragment Length"
+      xmax: 1000
+  fld_stats_peaks:
+    file_format: "tsv"
+    section_name: "FLD Stats (Peaks)"
+    description: "Presence or Absense of nucleosome-free, mono and di-nucleosome peaks in FLD. A nucleosome free region (NFR) must be present. A mononucleosome peak must be present."
+    plot_type: "table"
+    pconfig:
+      id: "FLD Stats"
+      title: "FLD Stats table"
+  fld_stats_details:
+    file_format: "tsv"
+    section_name: "FLD Stats (Fractions and Ratios)"
+    description: "Fractions and Ratios of interest"
+    plot_type: "table"
+    pconfig:
+      id: "FLD Stats"
+      title: "FLD Stats table"
+  MACS2_Peak_Annotations:
+    section_name: "MACS2 Peaks"
+    description: "Peaks called using MACS2. For human or mouse data, Npeaks should be >150,000, though values >100,000 may be acceptable. "
+  Genrich_Peak_Annotations:
+    section_name: "Genrich Peaks"
+    description: "Peaks called using Genrich. For human or mouse data, Npeaks should be >150,000, though values >100,000 may be acceptable. "
+  peak_width_files:
+    file_format: "tsv"
+    section_name: "Peak width distribution"
+    description: "Peak width distribution of consensus peaks."
+    plot_type: "linegraph"
+    pconfig:
+      id: "PWD"
+      title: "Peak Width Distribution"
+      ylab: "Peak Density Percentage"
+      xlab: "Peak Width"
+      xmax: 20000
+  frip_stats:
+    file_format: "tsv"
+    section_name: "FRiP Stats"
+    description: "The fraction of reads in called peak regions (FRiP score) should be >0.3, though values greater than 0.2 are acceptable. Fraction of Reads in Peaks/DHS/Enhancers/Promoters are also reported. For human or mouse data, FRiPromoters is 12-20%"
+    plot_type: "table"
+    pconfig:
+      id: "FRiP Stats"
+      title: "FRiP Stats table"
+  tss_files:
+    file_format: "tsv"
+    section_name: "TSS distribution"
+    description: "Greenleaf Normalized TSS per sample distribution"
+    plot_type: "linegraph"
+    pconfig:
+      id: "TSS Enrichment"
+      title: "TSS Enrichment Distribution"
+      ylab: "Greenleaf Normalized TSS Enrichment"
+      xlab: "Distance from TSS"
+  tss_knicking_sites_files:
+    file_format: "tsv"
+    section_name: "TSS Score Scatter"
+    description: "TSS score to TSS with >20 Tn5knicking sites scatter. "
+    plot_type: "scatter"
+    pconfig:
+      id: "TSS_scatter"
+      title: "TSS_Score_Scatter"
+      ylab: "TSS score"
+      xlab: "Number of TSS sites with > 20 Tn5 knick sites"
 
 sp:
-    nrf_stats:
-        fn: 'NRF_stats.tsv'
-    frip_stats:
-        fn: 'FRiP_stats.tsv'
-    fld_stats_peaks:
-        fn: 'FLD_stats_peaks.tsv'
-    fld_stats_details:
-        fn: 'FLD_stats_fractions_ratios.tsv'    
-    tss_knicking_sites_files:
-        fn: 'data.tss_nicking_sites.txt'
-    peak_width_files:
-        fn: '*.annotated.peak_width_density'
-    fld_files:
-        fn: '*.fld.txt'
-    tss_files:
-        fn: '*.tss.txt'
+  nrf_stats:
+    fn: "NRF_stats.tsv"
+  frip_stats:
+    fn: "FRiP_stats.tsv"
+  fld_stats_peaks:
+    fn: "FLD_stats_peaks.tsv"
+  fld_stats_details:
+    fn: "FLD_stats_fractions_ratios.tsv"
+  tss_knicking_sites_files:
+    fn: "data.tss_nicking_sites.txt"
+  peak_width_files:
+    fn: "*.annotated.peak_width_density"
+  fld_files:
+    fn: "*.fld.txt"
+  tss_files:
+    fn: "*.tss.txt"
 fn_clean_exts:
-     - '.genrich.narrowPeak.annotation_distribution'
-     - '.consensus.bed.annotated.peak_width_density'
-     - '.filt.bam'
-     - '.preseq'
-     - '.tss.txt'
-     - '.fld.txt'
-     - '.bowtie2.log'
-     - '.fastq.gz'
+  - ".genrich.narrowPeak.annotation_distribution"
+  - ".consensus.bed.annotated.peak_width_density"
+  - ".filt.bam"
+  - ".preseq"
+  - ".tss.txt"
+  - ".fld.txt"
+  - ".bowtie2.log"
+  - ".fastq.gz"