From f5cbf25cc173cd529d21dca0b91710ffcde31051 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 9 Mar 2026 20:00:20 +0000
Subject: [PATCH 1/2] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/pre-commit/pre-commit-hooks: v1.2.3 → v6.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v1.2.3...v6.0.0)
- [github.com/codespell-project/codespell: v2.2.4 → v2.4.2](https://github.com/codespell-project/codespell/compare/v2.2.4...v2.4.2)
- https://github.com/psf/black → https://github.com/psf/black-pre-commit-mirror
- [github.com/psf/black-pre-commit-mirror: 23.7.0 → 26.3.0](https://github.com/psf/black-pre-commit-mirror/compare/23.7.0...26.3.0)
- [github.com/lorenzwalthert/precommit: v0.1.2 → v0.4.3.9021](https://github.com/lorenzwalthert/precommit/compare/v0.1.2...v0.4.3.9021)
- [github.com/pre-commit/mirrors-prettier: v2.7.1 → v4.0.0-alpha.8](https://github.com/pre-commit/mirrors-prettier/compare/v2.7.1...v4.0.0-alpha.8)
- [github.com/compilerla/conventional-pre-commit: v2.3.0 → v4.4.0](https://github.com/compilerla/conventional-pre-commit/compare/v2.3.0...v4.4.0)
---
 .pre-commit-config.yaml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 12dc373..76fb005 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ exclude: |
    )
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v1.2.3
+    rev: v6.0.0
     hooks:
       - id: check-added-large-files
       - id: end-of-file-fixer
@@ -16,28 +16,28 @@ repos:
       - id: check-json
   # spell check
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.4.2
     hooks:
       - id: codespell
         # https://github.com/codespell-project/codespell/issues/1498
   # Python formatting
-  - repo: https://github.com/psf/black
-    rev: 23.7.0
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 26.3.0
     hooks:
       - id: black
   # R formatting
   - repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.1.2
+    rev: v0.4.3.9021
     hooks:
       - id: style-files
   # general linting
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v2.7.1
+    rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
   # enforce commit format
   - repo: https://github.com/compilerla/conventional-pre-commit
-    rev: v2.3.0
+    rev: v4.4.0
     hooks:
       - id: conventional-pre-commit
         stages: [commit-msg]

From 684daf85cb4e1332932486dbfe20824b01f942c5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 9 Mar 2026 20:08:27 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .github/CONTRIBUTING.md                       |  26 +-
 docs/deployment.md                            |  26 +-
 resources/frip/README.md                      |  19 +-
 resources/tssBed/README.md                    |   4 +-
 workflow/scripts/DESeq2.Rmd                   | 252 ++++++-------
 workflow/scripts/DESeq2_runner.R              | 179 +++++-----
 workflow/scripts/aggregate_results.Rmd        | 163 +++++----
 workflow/scripts/aggregate_results_runner.R   | 114 +++---
 workflow/scripts/ccbr_annotate_bed.R          | 258 ++++++++------
 workflow/scripts/ccbr_annotate_peaks.R        | 328 +++++++++--------
 ...ixed_width_peakSets_to_consensus_peakSet.R | 336 ++++++++++--------
 .../scripts/narrowPeak_normalize_pvalues.R    |  98 ++---
 .../narrowPeak_to_fixed_width_peakSet.R       | 194 +++++-----
 workflow/scripts/pairwise.Rmd                 |  80 +++--
 14 files changed, 1115 insertions(+), 962 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 58e0b95..a19998c 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -49,26 +49,26 @@ cd ASPEN
 
 - Install the python dependencies with pip
 
-  ```sh
-  pip install .
-  ```
+    ```sh
+    pip install .
+    ```
 
-  If you're developing on biowulf, you can use our shared conda environment which already has these dependencies installed
+    If you're developing on biowulf, you can use our shared conda environment which already has these dependencies installed
 
-  ```sh
-  . "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh"
-  conda activate py311
-  ```
+    ```sh
+    . "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh"
+    conda activate py311
+    ```
 
 - Install [`pre-commit`](https://pre-commit.com/#install) if you don't already
   have it. Then from the repo's root directory, run
 
-  ```sh
-  pre-commit install
-  ```
+    ```sh
+    pre-commit install
+    ```
 
-  This will install the repo's pre-commit hooks.
-  You'll only need to do this step the first time you clone the repo.
+    This will install the repo's pre-commit hooks.
+    You'll only need to do this step the first time you clone the repo.
 
 ### Create a branch
 
diff --git a/docs/deployment.md b/docs/deployment.md
index c9a604d..4cfae98 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -138,11 +138,11 @@ MACS2 parameters can be changed by editing this block in the `config.yaml`:
 
 ```yaml
 macs2:
-  extsize: 200
-  shiftsize: 100
-  p: 0.01
-  qfilter: 0.05
-  annotatePeaks: True
+    extsize: 200
+    shiftsize: 100
+    p: 0.01
+    qfilter: 0.05
+    annotatePeaks: True
 ```
 
 #### Genrich
@@ -151,14 +151,14 @@ Genrich paramaters can be changed by editing this block in the `config.yaml`:
 
 ```yaml
 genrich:
-  s: 5
-  m: 6
-  q: 1
-  l: 100
-  g: 100
-  d: 100
-  qfilter: 0.05
-  annotatePeaks: True
+    s: 5
+    m: 6
+    q: 1
+    l: 100
+    g: 100
+    d: 100
+    qfilter: 0.05
+    annotatePeaks: True
 ```
 
 #### Contrasts
diff --git a/resources/frip/README.md b/resources/frip/README.md
index e716d39..560a7ae 100644
--- a/resources/frip/README.md
+++ b/resources/frip/README.md
@@ -43,23 +43,20 @@ optional arguments:
 Sources:
 
 - HACER
-
-  - hg19 : http://bioinfo.vanderbilt.edu/AE/HACER/download/T1.txt
+    - hg19 : http://bioinfo.vanderbilt.edu/AE/HACER/download/T1.txt
 
 - FANTOM
-
-  - hg38 : http://fantom.gsc.riken.jp/5/datafiles/reprocessed/hg38_latest/extra/enhancer/F5.hg38.enhancers.bed.gz
-  - hg19 : http://fantom.gsc.riken.jp/5/datafiles/latest/extra/Enhancers/human_permissive_enhancers_phase_1_and_2.bed.gz
-  - mm9 : http://fantom.gsc.riken.jp/5/datafiles/latest/extra/Enhancers/mouse_permissive_enhancers_phase_1_and_2.bed.gz
-  - mm10 : http://fantom.gsc.riken.jp/5/datafiles/reprocessed/mm10_latest/extra/enhancer/F5.mm10.enhancers.bed.gz
+    - hg38 : http://fantom.gsc.riken.jp/5/datafiles/reprocessed/hg38_latest/extra/enhancer/F5.hg38.enhancers.bed.gz
+    - hg19 : http://fantom.gsc.riken.jp/5/datafiles/latest/extra/Enhancers/human_permissive_enhancers_phase_1_and_2.bed.gz
+    - mm9 : http://fantom.gsc.riken.jp/5/datafiles/latest/extra/Enhancers/mouse_permissive_enhancers_phase_1_and_2.bed.gz
+    - mm10 : http://fantom.gsc.riken.jp/5/datafiles/reprocessed/mm10_latest/extra/enhancer/F5.mm10.enhancers.bed.gz
 
 - DBSuper
-
-  - mm9 : http://asntech.org/dbsuper/data/bed/mm9/all_mm9_bed.bed
-  - hg19 : http://asntech.org/dbsuper/data/bed/hg19/all_hg19_bed.bed
+    - mm9 : http://asntech.org/dbsuper/data/bed/mm9/all_mm9_bed.bed
+    - hg19 : http://asntech.org/dbsuper/data/bed/hg19/all_hg19_bed.bed
 
 - EnhancerAtlas:
-  - fasta files were downloaded using the following commands for hg19 and mm9 data:
+    - fasta files were downloaded using the following commands for hg19 and mm9 data:
 
 ```
 % for i in 3T3-L1	416B	AtT-20	BAT	Bone_marrow	Brain_E14.5	Brown_preadipocyte_E18.5	C3H10Thalf	CD19+	CD4+CD8+	CD4+Treg	CD4+	CD8+	Cerebellum	Cerebellum_neonate	CH12	CMP	Cortex	Dendritic_cell	EpiLC	EpiSC	Erythroid_fetal_liver	Erythroid_spleen	ESC_Bruce4	ESC_J1	ESC_KH2	ESC_NPC	Forebrain_E11.5	Forebrain_E12.5	Forelimb_bud_embryo	Forelimb_E11	Forelimb_E13	G1E-ER4	G1E	GMP	Heart_E11.5	Heart_E12.5	Heart_E14.5	Heart	Hepatocyte	HFSC	Hindbrain_E11.5	Intestine	IPSC	Kidney	Large_intestine_epithelial	Lens_P1	Limb_E11.5	Limb_E14.5	Liver_E14.5	Liver	Lung_E14.5	Lung	Lung_neonate	MC3T3-E1	Megakaryocyte	MEL	Microglia	Midbrain_E11	Neuron_cortical	NIH-3T3	NKC_spleen	NKT	NPC	Olfactory_bulb	Pancreas	Pancreatic_islet	PDC_BM	PDC	Peritoneal_macrophage	Placenta	Pre-B	Pre-pro-B	Pro-B_BM	Prostate	Rib_chondrocyte_P1	Spermatid	Spleen	Stomach_neonate	Striatum	Testis	Th1	Th2	Thymus	Treg_cell	Uterus	V6.5	WAT	ZHBTc4;do
diff --git a/resources/tssBed/README.md b/resources/tssBed/README.md
index be1c796..1734c73 100644
--- a/resources/tssBed/README.md
+++ b/resources/tssBed/README.md
@@ -8,8 +8,8 @@
 - is strand aware
 - extracts chrom, start, geneid, geneName from the "gene" line for gene_type == "protein_coding" only
 - creates 400 bins around the TSS
-  - start if on + strand
-  - end if on - strand
+    - start if on + strand
+    - end if on - strand
 - each bin is 10 bp width so total -2k through +2k rgeion is covered in the 400 bins
 
 ```bash
diff --git a/workflow/scripts/DESeq2.Rmd b/workflow/scripts/DESeq2.Rmd
index 787e804..01d7070 100755
--- a/workflow/scripts/DESeq2.Rmd
+++ b/workflow/scripts/DESeq2.Rmd
@@ -54,40 +54,41 @@ suppressPackageStartupMessages(library("RColorBrewer"))
 suppressPackageStartupMessages(library("ComplexHeatmap"))
 options(ggrepel.max.overlaps = 100)
 # setup variables
-mount="~/data_RBL_NCI"
+mount <- "~/data_RBL_NCI"
 
 # read in sampleinfo
-coldata=params$coldata
-sampleinfo=read.csv(file=coldata,header=FALSE,sep="\t",comment.char = "#",strip.white = TRUE)
-colnames(sampleinfo) = c("replicateName","sampleName")
+coldata <- params$coldata
+sampleinfo <- read.csv(file = coldata, header = FALSE, sep = "\t", comment.char = "#", strip.white = TRUE)
+colnames(sampleinfo) <- c("replicateName", "sampleName")
 
 # read in counts matrix
-rawcountsmatrix=params$rawcountsmatrix
-countsdf=read.csv(file=rawcountsmatrix,header=TRUE,sep="\t",comment.char = "#",check.names = FALSE,strip.white = TRUE)
-original_countsdf = countsdf
+rawcountsmatrix <- params$rawcountsmatrix
+countsdf <- read.csv(file = rawcountsmatrix, header = TRUE, sep = "\t", comment.char = "#", check.names = FALSE, strip.white = TRUE)
+original_countsdf <- countsdf
 
 # excludecols=unlist(strsplit("Chr,Start,End,Strand,Length",","))
 # indexcols=unlist(strsplit("Geneid",","))
-excludecols=unlist(strsplit(params$excludecols,","))
-indexcols=unlist(strsplit(params$indexcols,","))
-countmatrix=countsdf %>% dplyr::select(-all_of(excludecols)) %>% column_to_rownames(var="Geneid")
-countmatrix=countmatrix[,sampleinfo$replicateName]
-sampleinfo$librarySize=colSums(countmatrix)
-
-roi=original_countsdf[,c("Chr","Start","End","Geneid")]
-colnames(roi)=c("chr","start","end","Geneid")
-roi=makeGRangesFromDataFrame(roi,keep.extra.columns=TRUE)
-nroi=nrow(as.data.frame(roi))
-
-contrast=paste0(params$contrast_numerator,"_vs_",params$contrast_denominator)
+excludecols <- unlist(strsplit(params$excludecols, ","))
+indexcols <- unlist(strsplit(params$indexcols, ","))
+countmatrix <- countsdf %>%
+  dplyr::select(-all_of(excludecols)) %>%
+  column_to_rownames(var = "Geneid")
+countmatrix <- countmatrix[, sampleinfo$replicateName]
+sampleinfo$librarySize <- colSums(countmatrix)
+
+roi <- original_countsdf[, c("Chr", "Start", "End", "Geneid")]
+colnames(roi) <- c("chr", "start", "end", "Geneid")
+roi <- makeGRangesFromDataFrame(roi, keep.extra.columns = TRUE)
+nroi <- nrow(as.data.frame(roi))
+
+contrast <- paste0(params$contrast_numerator, "_vs_", params$contrast_denominator)
 if (!(params$contrast_numerator %in% sampleinfo$sampleName)) {
-  stop(paste("ERROR:",params$contrast_numerator,"is not a samplename in sampleinfo file."))
+  stop(paste("ERROR:", params$contrast_numerator, "is not a samplename in sampleinfo file."))
 }
 if (!(params$contrast_denominator %in% sampleinfo$sampleName)) {
-  stop(paste("ERROR:",params$contrast_denominator,"is not a samplename in sampleinfo file."))
+  stop(paste("ERROR:", params$contrast_denominator, "is not a samplename in sampleinfo file."))
 }
-nsamples=nrow(sampleinfo)
-
+nsamples <- nrow(sampleinfo)
 ```
 
 
@@ -99,24 +100,28 @@ Total of `r nroi` ROIs
 x <- 0
 if (params$genome == "hg38") {
   txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
-  roiAnno <- annotatePeak(roi, tssRegion=c(-2000, 200),
-                            TxDb=txdb, annoDb="org.Hs.eg.db")
-  roiAnnodf=as.data.frame(roiAnno)
-  roiAnnodf$shortAnno=stringr::word(roiAnnodf$annotation,1)
-  table_anno=table(roiAnnodf$shortAnno)
+  roiAnno <- annotatePeak(roi,
+    tssRegion = c(-2000, 200),
+    TxDb = txdb, annoDb = "org.Hs.eg.db"
+  )
+  roiAnnodf <- as.data.frame(roiAnno)
+  roiAnnodf$shortAnno <- stringr::word(roiAnnodf$annotation, 1)
+  table_anno <- table(roiAnnodf$shortAnno)
   piecolor <- brewer.pal(length(table_anno), "Set2")
-  pieplot <- pie(table_anno,col=piecolor)
+  pieplot <- pie(table_anno, col = piecolor)
   x <- 1
 }
-if (params$genome == "mm10"){
+if (params$genome == "mm10") {
   txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
-  roiAnno <- annotatePeak(roi, tssRegion=c(-2000, 200),
-                            TxDb=txdb, annoDb="org.Mm.eg.db")
-  roiAnnodf=as.data.frame(roiAnno)
-  roiAnnodf$shortAnno=stringr::word(roiAnnodf$annotation,1)
-  table_anno=table(roiAnnodf$shortAnno)
+  roiAnno <- annotatePeak(roi,
+    tssRegion = c(-2000, 200),
+    TxDb = txdb, annoDb = "org.Mm.eg.db"
+  )
+  roiAnnodf <- as.data.frame(roiAnno)
+  roiAnnodf$shortAnno <- stringr::word(roiAnnodf$annotation, 1)
+  table_anno <- table(roiAnnodf$shortAnno)
   piecolor <- brewer.pal(length(table_anno), "Set2")
-  pieplot <- pie(table_anno,col=piecolor)
+  pieplot <- pie(table_anno, col = piecolor)
   x <- 2
 }
 ```
@@ -127,8 +132,7 @@ if (params$genome == "mm10"){
 Total of `r nsamples` samples.
 
 ```{r metadata,include=TRUE,echo=FALSE,cache=FALSE}
-pander(sampleinfo,style="rmarkdown")
-
+pander(sampleinfo, style = "rmarkdown")
 ```
 
 
@@ -136,73 +140,80 @@ pander(sampleinfo,style="rmarkdown")
 ## PCA
 
 ```{r allsamplepca,include=TRUE,echo=FALSE, results='hide',cache=FALSE,warning=FALSE,message=FALSE,error=FALSE}
-dds1 <- DESeqDataSetFromMatrix(countData = as.matrix(countmatrix),
-                                colData = sampleinfo[,c("replicateName","sampleName")],
-                                design = ~ 1)
+dds1 <- DESeqDataSetFromMatrix(
+  countData = as.matrix(countmatrix),
+  colData = sampleinfo[, c("replicateName", "sampleName")],
+  design = ~1
+)
 dds1 <- DESeq(dds1)
-if (nrow(dds1)<2000) {
-  rld1 <- varianceStabilizingTransformation(dds1, blind=TRUE)
+if (nrow(dds1) < 2000) {
+  rld1 <- varianceStabilizingTransformation(dds1, blind = TRUE)
 } else {
-  rld1 <- vst(dds1, blind=TRUE)
+  rld1 <- vst(dds1, blind = TRUE)
 }
-assayrld1 = as.data.frame(assay(rld1))
-assayrld1$row_variance = rowVars(as.matrix(assayrld1))
-assayrld1 = arrange(assayrld1,desc(row_variance))
-zero_variance_rows=assayrld1$row_variance<1e-5
-assayrld1$row_variance = NULL
-assayrld1 = assayrld1[!zero_variance_rows,]
-assayrld1_original=assayrld1
+assayrld1 <- as.data.frame(assay(rld1))
+assayrld1$row_variance <- rowVars(as.matrix(assayrld1))
+assayrld1 <- arrange(assayrld1, desc(row_variance))
+zero_variance_rows <- assayrld1$row_variance < 1e-5
+assayrld1$row_variance <- NULL
+assayrld1 <- assayrld1[!zero_variance_rows, ]
+assayrld1_original <- assayrld1
 if (nrow(assayrld1) > 50000) {
-  assayrld1 = assayrld1[1:50000,]
+  assayrld1 <- assayrld1[1:50000, ]
 }
-pca1=prcomp(t(assayrld1),scale. = T)
-m.pc1 = round(pca1$sdev[1]^2/sum(pca1$sdev^2)*100,2)
-m.pc2 = round(pca1$sdev[2]^2/sum(pca1$sdev^2)*100,2)
-m.pc3 = round(pca1$sdev[3]^2/sum(pca1$sdev^2)*100,2)
-xlab=paste0("PC1(",m.pc1,"%)")
-ylab=paste0("PC2(",m.pc2,"%)")
+pca1 <- prcomp(t(assayrld1), scale. = T)
+m.pc1 <- round(pca1$sdev[1]^2 / sum(pca1$sdev^2) * 100, 2)
+m.pc2 <- round(pca1$sdev[2]^2 / sum(pca1$sdev^2) * 100, 2)
+m.pc3 <- round(pca1$sdev[3]^2 / sum(pca1$sdev^2) * 100, 2)
+xlab <- paste0("PC1(", m.pc1, "%)")
+ylab <- paste0("PC2(", m.pc2, "%)")
 samples <- as.factor(sampleinfo$sampleName)
-p <- ggplot(pca1$x,aes(x=PC1,y=PC2,label=rownames(pca1$x)))+
-  geom_point(aes(col=samples,shape=samples))+scale_shape_manual(values=seq(0,15))+
-  geom_text_repel(max.overlaps = 15,size=2)+
-  xlab(xlab)+ylab(ylab)+
-  theme_light(base_size = 10)+
-  ggtitle("PCA (all samples)")+
+p <- ggplot(pca1$x, aes(x = PC1, y = PC2, label = rownames(pca1$x))) +
+  geom_point(aes(col = samples, shape = samples)) +
+  scale_shape_manual(values = seq(0, 15)) +
+  geom_text_repel(max.overlaps = 15, size = 2) +
+  xlab(xlab) +
+  ylab(ylab) +
+  theme_light(base_size = 10) +
+  ggtitle("PCA (all samples)") +
   theme_light()
 print(p)
 ```
 
 ```{r contrast,include=TRUE,echo=FALSE, results='hide',cache=FALSE,warning=FALSE,message=FALSE,error=FALSE}
-
-groups_in_contrasts=c(params$contrast_numerator,params$contrast_denominator)
+groups_in_contrasts <- c(params$contrast_numerator, params$contrast_denominator)
 sampleinfo %>% filter(sampleinfo$sampleName %in% groups_in_contrasts) -> sampleinfo2
-countmatrix2 <- countmatrix[,sampleinfo2$replicateName]
-colData = sampleinfo2[,c("replicateName","sampleName")]
-colData$sampleName <- relevel(as.factor(colData$sampleName),ref=params$contrast_denominator)
-dds2 <- DESeqDataSetFromMatrix(countData = as.matrix(countmatrix2),
-                                colData = colData,
-                                design = ~ sampleName)
+countmatrix2 <- countmatrix[, sampleinfo2$replicateName]
+colData <- sampleinfo2[, c("replicateName", "sampleName")]
+colData$sampleName <- relevel(as.factor(colData$sampleName), ref = params$contrast_denominator)
+dds2 <- DESeqDataSetFromMatrix(
+  countData = as.matrix(countmatrix2),
+  colData = colData,
+  design = ~sampleName
+)
 dds2 <- DESeq(dds2)
-if (nrow(dds2)<2000) {
-  rld2 <- varianceStabilizingTransformation(dds2, blind=TRUE)
+if (nrow(dds2) < 2000) {
+  rld2 <- varianceStabilizingTransformation(dds2, blind = TRUE)
 } else {
-  rld2 <- vst(dds2, blind=TRUE)
+  rld2 <- vst(dds2, blind = TRUE)
 }
 pca_data <- assay(rld2)
 pca_data <- pca_data[apply(pca_data, 1, var) > 1e-5, ]
-pca2=prcomp(t(pca_data),scale.=T)
-m2.pc1 = round(pca2$sdev[1]^2/sum(pca2$sdev^2)*100,2)
-m2.pc2 = round(pca2$sdev[2]^2/sum(pca2$sdev^2)*100,2)
-m2.pc3 = round(pca2$sdev[3]^2/sum(pca2$sdev^2)*100,2)
-xlab2=paste0("PC1(",m2.pc1,"%)")
-ylab2=paste0("PC2(",m2.pc2,"%)")
+pca2 <- prcomp(t(pca_data), scale. = T)
+m2.pc1 <- round(pca2$sdev[1]^2 / sum(pca2$sdev^2) * 100, 2)
+m2.pc2 <- round(pca2$sdev[2]^2 / sum(pca2$sdev^2) * 100, 2)
+m2.pc3 <- round(pca2$sdev[3]^2 / sum(pca2$sdev^2) * 100, 2)
+xlab2 <- paste0("PC1(", m2.pc1, "%)")
+ylab2 <- paste0("PC2(", m2.pc2, "%)")
 samples2 <- as.factor(sampleinfo2$sampleName)
-p <- ggplot(pca2$x,aes(x=PC1,y=PC2,label=rownames(pca2$x)))+
-  geom_point(aes(col=samples2,shape=samples2))+scale_shape_manual(values=seq(0,15))+
-  geom_text_repel(max.overlaps = 15,size=2)+
-  xlab(xlab2)+ylab(ylab2)+
-  theme_light(base_size = 10)+
-  ggtitle("PCA")+
+p <- ggplot(pca2$x, aes(x = PC1, y = PC2, label = rownames(pca2$x))) +
+  geom_point(aes(col = samples2, shape = samples2)) +
+  scale_shape_manual(values = seq(0, 15)) +
+  geom_text_repel(max.overlaps = 15, size = 2) +
+  xlab(xlab2) +
+  ylab(ylab2) +
+  theme_light(base_size = 10) +
+  ggtitle("PCA") +
   theme_light()
 print(p)
 ```
@@ -210,41 +221,42 @@ print(p)
 ## Results
 
 ```{r result,include=TRUE,echo=FALSE,warning=FALSE,message=FALSE,error=FALSE}
-res = DESeq2::results(dds2)
-resdf = as.data.frame(res)
-resdf$Geneid = rownames(resdf)
-resdf_w_anno = merge(resdf,roiAnnodf,by="Geneid")
-up_roi=resdf_w_anno[resdf_w_anno$log2FoldChange > params$FC_cutoff & resdf_w_anno$padj < params$FDR_cutoff,]
-down_roi=resdf_w_anno[resdf_w_anno$log2FoldChange < (-1 * params$FC_cutoff) & resdf_w_anno$padj < params$FDR_cutoff,]
-outfilename = paste0(contrast,".results.tsv")
-write.table(resdf_w_anno,file=params$diffatactsv,quote = FALSE,sep="\t",row.names = FALSE,col.names = TRUE)
-resdf_w_anno %>% dplyr::select(-c("baseMean","lfcSE","stat","width","strand","annotation","geneChr","geneStart","geneEnd","geneLength","geneStrand","transcriptId","geneId")) -> resdf_w_anno
-DT::datatable(resdf_w_anno,rownames = FALSE) %>% DT::formatRound(columns=c("log2FoldChange","pvalue","padj"),digits=3)
+res <- DESeq2::results(dds2)
+resdf <- as.data.frame(res)
+resdf$Geneid <- rownames(resdf)
+resdf_w_anno <- merge(resdf, roiAnnodf, by = "Geneid")
+up_roi <- resdf_w_anno[resdf_w_anno$log2FoldChange > params$FC_cutoff & resdf_w_anno$padj < params$FDR_cutoff, ]
+down_roi <- resdf_w_anno[resdf_w_anno$log2FoldChange < (-1 * params$FC_cutoff) & resdf_w_anno$padj < params$FDR_cutoff, ]
+outfilename <- paste0(contrast, ".results.tsv")
+write.table(resdf_w_anno, file = params$diffatactsv, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
+resdf_w_anno %>% dplyr::select(-c("baseMean", "lfcSE", "stat", "width", "strand", "annotation", "geneChr", "geneStart", "geneEnd", "geneLength", "geneStrand", "transcriptId", "geneId")) -> resdf_w_anno
+DT::datatable(resdf_w_anno, rownames = FALSE) %>% DT::formatRound(columns = c("log2FoldChange", "pvalue", "padj"), digits = 3)
 ```
 
 ## Volcano
 
 ```{r volcano,include=TRUE,echo=FALSE, message=FALSE, results='hide',fig.width=6,fig.height=12,warning=FALSE,message=FALSE,error=FALSE}
-legendtxt = c("NS",paste0("log2FC > |",params$FC_cutoff,"|"),paste0("FDR < ",params$FDR_cutoff),"significant")
+legendtxt <- c("NS", paste0("log2FC > |", params$FC_cutoff, "|"), paste0("FDR < ", params$FDR_cutoff), "significant")
 EnhancedVolcano::EnhancedVolcano(resdf_w_anno,
-                lab=resdf_w_anno$SYMBOL,
-                x = "log2FoldChange",
-                y = "padj",
-                ylab = bquote(~-Log[10]~FDR),
-                FCcutoff = params$FC_cutoff,
-                xlim=c(min(resdf$log2FoldChange)-1,max(resdf$log2FoldChange)+1),
-                ylim= c(0, max(-log10(resdf$padj), na.rm=TRUE) + 1),
-                pCutoff = params$FDR_cutoff,
-                legendPosition = 'top',
-                pointSize = 0.8,
-                labSize = 2,
-                legendLabSize = 4,
-                legendIconSize = 1,
-                legendLabels = legendtxt,
-                title="",titleLabSize=0,
-                drawConnectors = TRUE,
-                subtitle = contrast,subtitleLabSize = 10,
-                caption = "",captionLabSize=0)
+  lab = resdf_w_anno$SYMBOL,
+  x = "log2FoldChange",
+  y = "padj",
+  ylab = bquote(~ -Log[10] ~ FDR),
+  FCcutoff = params$FC_cutoff,
+  xlim = c(min(resdf$log2FoldChange) - 1, max(resdf$log2FoldChange) + 1),
+  ylim = c(0, max(-log10(resdf$padj), na.rm = TRUE) + 1),
+  pCutoff = params$FDR_cutoff,
+  legendPosition = "top",
+  pointSize = 0.8,
+  labSize = 2,
+  legendLabSize = 4,
+  legendIconSize = 1,
+  legendLabels = legendtxt,
+  title = "", titleLabSize = 0,
+  drawConnectors = TRUE,
+  subtitle = contrast, subtitleLabSize = 10,
+  caption = "", captionLabSize = 0
+)
 ```
 
 
@@ -254,10 +266,12 @@ EnhancedVolcano::EnhancedVolcano(resdf_w_anno,
 ```{r updown,include=TRUE,echo=FALSE,cache=FALSE,warning=FALSE}
 # sometimes FC and FDR are too strict and up_roi/down_roi can be zero
 # to work with that scenario without errors:
-x=as.data.frame(rbind(table(up_roi$shortAnno),table(down_roi$shortAnno)))
+x <- as.data.frame(rbind(table(up_roi$shortAnno), table(down_roi$shortAnno)))
 if (nrow(x) == 2) {
-  rownames(x) <- c(paste("Open in", params$contrast_numerator),
-                   paste("Open in", params$contrast_denominator))
+  rownames(x) <- c(
+    paste("Open in", params$contrast_numerator),
+    paste("Open in", params$contrast_denominator)
+  )
   x$Total <- rowSums(x)
   DT::datatable(x, rownames = TRUE)
 } else if (nrow(x) > 0) {
diff --git a/workflow/scripts/DESeq2_runner.R b/workflow/scripts/DESeq2_runner.R
index d2d6000..572c67e 100644
--- a/workflow/scripts/DESeq2_runner.R
+++ b/workflow/scripts/DESeq2_runner.R
@@ -1,121 +1,136 @@
 #!/usr/bin/env Rscript
 suppressPackageStartupMessages(library("argparse"))
-#suppressPackageStartupMessages(library("here"))
-#scriptfolder=here()
+# suppressPackageStartupMessages(library("here"))
+# scriptfolder=here()
 
 
 # create parser object
-parser <- ArgumentParser(description="Wrapper for running DESeq2 with ATACseq counts matrix")
+parser <- ArgumentParser(description = "Wrapper for running DESeq2 with ATACseq counts matrix")
 
 
 parser$add_argument("-m", "--countsmatrix",
-                    type="character",
-                    help="path to countsmatrix",
-                    required=TRUE)
+  type = "character",
+  help = "path to countsmatrix",
+  required = TRUE
+)
 parser$add_argument("-g", "--genome",
-                    type="character",
-                    help="genome .. either hg38 or mm10",
-                    required=FALSE,
-                    default="hg38")
+  type = "character",
+  help = "genome .. either hg38 or mm10",
+  required = FALSE,
+  default = "hg38"
+)
 parser$add_argument("-c", "--coldata",
-                    type="character",
-                    help="coldata or sampleinto TSV file",
-                    required=TRUE)
+  type = "character",
+  help = "coldata or sampleinto TSV file",
+  required = TRUE
+)
 parser$add_argument("-n", "--contrastnumerator",
-                    type="character",
-                    help="Group1 of the contrasts",
-                    required=TRUE)
+  type = "character",
+  help = "Group1 of the contrasts",
+  required = TRUE
+)
 parser$add_argument("-d", "--contrastdenominator",
-                    type="character",
-                    help="Group2 of the contrasts",
-                    required=TRUE)
+  type = "character",
+  help = "Group2 of the contrasts",
+  required = TRUE
+)
 parser$add_argument("-f", "--foldchange",
-                    type="double",
-                    help="log2FC threshold",
-                    required=FALSE,
-                    default=2.0)
+  type = "double",
+  help = "log2FC threshold",
+  required = FALSE,
+  default = 2.0
+)
 parser$add_argument("-p", "--fdr",
-                    type="double",
-                    help="adj. p-value threshold",
-                    required=FALSE,
-                    default=0.05)
+  type = "double",
+  help = "adj. p-value threshold",
+  required = FALSE,
+  default = 0.05
+)
 parser$add_argument("-i", "--indexcols",
-                    type="character",
-                    help="comma-separated list of index columns",
-                    required=TRUE)
+  type = "character",
+  help = "comma-separated list of index columns",
+  required = TRUE
+)
 parser$add_argument("-e", "--excludecols",
-                    type="character",
-                    help="comma-separated list of columns to exclude",
-                    required=TRUE)
+  type = "character",
+  help = "comma-separated list of columns to exclude",
+  required = TRUE
+)
 parser$add_argument("-o", "--outdir",
-                    type="character",
-                    help="output dir",
-                    required=TRUE)
+  type = "character",
+  help = "output dir",
+  required = TRUE
+)
 parser$add_argument("-t", "--tmpdir",
-                    type="character",
-                    help="temp dir",
-                    required=FALSE,
-		    default="/tmp")
+  type = "character",
+  help = "temp dir",
+  required = FALSE,
+  default = "/tmp"
+)
 parser$add_argument("-s", "--scriptsdir",
-                    type="character",
-                    help="scripts dir",
-                    required=TRUE)
+  type = "character",
+  help = "scripts dir",
+  required = TRUE
+)
 
 args <- parser$parse_args()
 
 # check inputs
-for ( f in c(args$countmatrix,args$coldata)) {
-  if (!file.exists(f)){
-    outstr=paste("ERROR: File: ",f,"Does not exists!")
+for (f in c(args$countmatrix, args$coldata)) {
+  if (!file.exists(f)) {
+    outstr <- paste("ERROR: File: ", f, "Does not exists!")
     message(outstr)
     quit(status = 1)
   }
-  if (file.access(f,mode=4)!=0){
-    outstr=paste("ERROR: File: ",f,"is not readable!")
+  if (file.access(f, mode = 4) != 0) {
+    outstr <- paste("ERROR: File: ", f, "is not readable!")
     message(outstr)
     quit(status = 1)
   }
 }
-if (!dir.exists(args$outdir)){
-    tryCatch(
-	     expr = {
-		     dir.create(args$outdir)
-	     },
-	     error = {
-		     outstr=paste("ERROR: Dir: ", args$outdir, " Cannot be created!")
-		     message(outstr)
-		     quit(status=1)
-	     }
-	)
+if (!dir.exists(args$outdir)) {
+  tryCatch(
+    expr = {
+      dir.create(args$outdir)
+    },
+    error = {
+      outstr <- paste("ERROR: Dir: ", args$outdir, " Cannot be created!")
+      message(outstr)
+      quit(status = 1)
+    }
+  )
 }
 
-sampleinfo=read.csv(file=args$coldata,header=FALSE,sep="\t",comment.char = "#",strip.white = TRUE)
-colnames(sampleinfo) = c("replicateName","sampleName")
-freq_table = as.data.frame(table(sampleinfo$sampleName))
-for ( condition in c(args$contrastnumerator,args$contrastdenominator) ) {
-  if (! condition %in% freq_table$Var1) {
-    outstr=paste("ERROR: Condition: ",condition,"absent in ",args$coldata)
+sampleinfo <- read.csv(file = args$coldata, header = FALSE, sep = "\t", comment.char = "#", strip.white = TRUE)
+colnames(sampleinfo) <- c("replicateName", "sampleName")
+freq_table <- as.data.frame(table(sampleinfo$sampleName))
+for (condition in c(args$contrastnumerator, args$contrastdenominator)) {
+  if (!condition %in% freq_table$Var1) {
+    outstr <- paste("ERROR: Condition: ", condition, "absent in ", args$coldata)
     message(outstr)
     quit(status = 1)
   }
 }
 
-outprefix 	=	paste0(args$contrastnumerator,"_vs_",args$contrastdenominator)
-outhtml 	=	paste0(outprefix,".html")
-outtsv	 	=	paste0(outprefix,".tsv")
+outprefix <- paste0(args$contrastnumerator, "_vs_", args$contrastdenominator)
+outhtml <- paste0(outprefix, ".html")
+outtsv <- paste0(outprefix, ".tsv")
 
-myparams <- list(rawcountsmatrix 	= args$countsmatrix,
-                 genome			= args$genome,
-                 coldata 		= args$coldata,
-                 contrast_numerator 	= args$contrastnumerator,
-                 contrast_denominator 	= args$contrastdenominator,
-                 FC_cutoff 		= args$foldchange,
-                 FDR_cutoff 		= args$fdr,
-                 indexcols 		= args$indexcols,
-                 excludecols 		= args$excludecols,
-                 diffatactsv 		= paste(args$outdir,outtsv,sep="/"))
+myparams <- list(
+  rawcountsmatrix = args$countsmatrix,
+  genome = args$genome,
+  coldata = args$coldata,
+  contrast_numerator = args$contrastnumerator,
+  contrast_denominator = args$contrastdenominator,
+  FC_cutoff = args$foldchange,
+  FDR_cutoff = args$fdr,
+  indexcols = args$indexcols,
+  excludecols = args$excludecols,
+  diffatactsv = paste(args$outdir, outtsv, sep = "/")
+)
 
-rmarkdown::render(paste(args$scriptsdir,'DESeq2.Rmd',sep="/"),
-          params		=	myparams,
-	  output_file		=	paste(args$outdir,outhtml,sep="/"),
-	  intermediates_dir	=	args$tmpdir)
+rmarkdown::render(paste(args$scriptsdir, "DESeq2.Rmd", sep = "/"),
+  params = myparams,
+  output_file = paste(args$outdir, outhtml, sep = "/"),
+  intermediates_dir = args$tmpdir
+)
diff --git a/workflow/scripts/aggregate_results.Rmd b/workflow/scripts/aggregate_results.Rmd
index 978a119..4ac7b8d 100644
--- a/workflow/scripts/aggregate_results.Rmd
+++ b/workflow/scripts/aggregate_results.Rmd
@@ -35,22 +35,24 @@ suppressPackageStartupMessages(library("RColorBrewer"))
 suppressPackageStartupMessages(library("ComplexHeatmap"))
 
 # read in sampleinfo
-coldata=params$coldata
-sampleinfo=read.csv(file=coldata,header=FALSE,sep="\t",comment.char = "#",strip.white = TRUE)
-colnames(sampleinfo) = c("replicateName","sampleName")
+coldata <- params$coldata
+sampleinfo <- read.csv(file = coldata, header = FALSE, sep = "\t", comment.char = "#", strip.white = TRUE)
+colnames(sampleinfo) <- c("replicateName", "sampleName")
 
 # read in counts matrix
-rawcountsmatrix=params$rawcountsmatrix
-countsdf=read.csv(file=rawcountsmatrix,header=TRUE,sep="\t",comment.char = "#",check.names = FALSE,strip.white = TRUE)
-original_countsdf = countsdf
-nroi=nrow(countsdf)
-
-excludecols=unlist(strsplit(params$excludecols,","))
-indexcols=unlist(strsplit(params$indexcols,","))
-countmatrix=countsdf %>% dplyr::select(-all_of(excludecols)) %>% column_to_rownames(var="Geneid")
-countmatrix=countmatrix[,sampleinfo$replicateName]
-sampleinfo$librarySize=colSums(countmatrix)
-nsamples=nrow(sampleinfo)
+rawcountsmatrix <- params$rawcountsmatrix
+countsdf <- read.csv(file = rawcountsmatrix, header = TRUE, sep = "\t", comment.char = "#", check.names = FALSE, strip.white = TRUE)
+original_countsdf <- countsdf
+nroi <- nrow(countsdf)
+
+excludecols <- unlist(strsplit(params$excludecols, ","))
+indexcols <- unlist(strsplit(params$indexcols, ","))
+countmatrix <- countsdf %>%
+  dplyr::select(-all_of(excludecols)) %>%
+  column_to_rownames(var = "Geneid")
+countmatrix <- countmatrix[, sampleinfo$replicateName]
+sampleinfo$librarySize <- colSums(countmatrix)
+nsamples <- nrow(sampleinfo)
 ```
 
 ## ROIs
@@ -62,97 +64,104 @@ Total of `r nroi` ROIs
 Total of `r nsamples` replicates.
 
 ```{r metadata,include=TRUE,echo=FALSE,cache=FALSE}
-pander(sampleinfo,style="rmarkdown")
-
+pander(sampleinfo, style = "rmarkdown")
 ```
 
 ## PCA
 
 ```{r allsamplepca,include=TRUE,echo=FALSE, results='hide',cache=FALSE,warning=FALSE,message=FALSE,error=FALSE}
-dds1 <- DESeqDataSetFromMatrix(countData = as.matrix(countmatrix),
-                                colData = sampleinfo[,c("replicateName","sampleName")],
-                                design = ~ 1)
+dds1 <- DESeqDataSetFromMatrix(
+  countData = as.matrix(countmatrix),
+  colData = sampleinfo[, c("replicateName", "sampleName")],
+  design = ~1
+)
 dds1 <- DESeq(dds1)
-if (nrow(dds1)<2000) {
-  rld1 <- varianceStabilizingTransformation(dds1,blind=TRUE)
+if (nrow(dds1) < 2000) {
+  rld1 <- varianceStabilizingTransformation(dds1, blind = TRUE)
 } else {
-  rld1 <- vst(dds1, blind=TRUE)
+  rld1 <- vst(dds1, blind = TRUE)
 }
-assayrld1 = as.data.frame(assay(rld1))
-assayrld1$row_variance = rowVars(as.matrix(assayrld1))
-assayrld1 = arrange(assayrld1,desc(row_variance))
-zero_variance_rows=assayrld1$row_variance<1e-5
-assayrld1$row_variance = NULL
-assayrld1 = assayrld1[!zero_variance_rows,]
-assayrld1_original=assayrld1
+assayrld1 <- as.data.frame(assay(rld1))
+assayrld1$row_variance <- rowVars(as.matrix(assayrld1))
+assayrld1 <- arrange(assayrld1, desc(row_variance))
+zero_variance_rows <- assayrld1$row_variance < 1e-5
+assayrld1$row_variance <- NULL
+assayrld1 <- assayrld1[!zero_variance_rows, ]
+assayrld1_original <- assayrld1
 if (nrow(assayrld1) > 50000) {
-  assayrld1 = assayrld1[1:50000,]
+  assayrld1 <- assayrld1[1:50000, ]
 }
-pca1=prcomp(t(assayrld1),scale. = T)
-m.pc1 = round(pca1$sdev[1]^2/sum(pca1$sdev^2)*100,2)
-m.pc2 = round(pca1$sdev[2]^2/sum(pca1$sdev^2)*100,2)
-m.pc3 = round(pca1$sdev[3]^2/sum(pca1$sdev^2)*100,2)
-xlab=paste0("PC1(",m.pc1,"%)")
-ylab=paste0("PC2(",m.pc2,"%)")
+pca1 <- prcomp(t(assayrld1), scale. = T)
+m.pc1 <- round(pca1$sdev[1]^2 / sum(pca1$sdev^2) * 100, 2)
+m.pc2 <- round(pca1$sdev[2]^2 / sum(pca1$sdev^2) * 100, 2)
+m.pc3 <- round(pca1$sdev[3]^2 / sum(pca1$sdev^2) * 100, 2)
+xlab <- paste0("PC1(", m.pc1, "%)")
+ylab <- paste0("PC2(", m.pc2, "%)")
 samples <- as.factor(sampleinfo$sampleName)
-p <- ggplot(pca1$x,aes(x=PC1,y=PC2,label=rownames(pca1$x)))+
-  geom_point(aes(col=samples,shape=samples))+scale_shape_manual(values=seq(0,15))+
-  geom_text_repel(max.overlaps = 15,size=2)+
-  xlab(xlab)+ylab(ylab)+
-  theme_light(base_size = 10)+
-  ggtitle("PCA (all samples)")+
+p <- ggplot(pca1$x, aes(x = PC1, y = PC2, label = rownames(pca1$x))) +
+  geom_point(aes(col = samples, shape = samples)) +
+  scale_shape_manual(values = seq(0, 15)) +
+  geom_text_repel(max.overlaps = 15, size = 2) +
+  xlab(xlab) +
+  ylab(ylab) +
+  theme_light(base_size = 10) +
+  ggtitle("PCA (all samples)") +
   theme_light()
 print(p)
 ```
 
 ```{r degs,include=TRUE,echo=FALSE, results='hide',cache=FALSE,warning=FALSE,message=FALSE,error=FALSE}
-allfiles = list.files(path=params$diffatacdir,pattern="_vs_")
-degfiles = allfiles[grepl(".tsv",allfiles)]
-contrasts = gsub(".tsv","",degfiles)
+allfiles <- list.files(path = params$diffatacdir, pattern = "_vs_")
+degfiles <- allfiles[grepl(".tsv", allfiles)]
+contrasts <- gsub(".tsv", "", degfiles)
 # deg_file=paste(params$diffatacdir,"DGCR8_KO_vs_Wt.tsv",sep="/")
-add_prefix <- function(string,prefix) {
-  return (paste(prefix,string,sep="_"))
+add_prefix <- function(string, prefix) {
+  return(paste(prefix, string, sep = "_"))
 }
-get_sig_geneids <- function(df,fc=2,fdr=0.05) {
-  df %>% filter(log2FoldChange > fc) %>% filter(padj < fdr) -> df_up
-  df %>% filter(log2FoldChange < (-1 * fc)) %>% filter(padj < fdr) -> df_down
-  return (c(df_up$Geneid,df_down$Geneid))
+get_sig_geneids <- function(df, fc = 2, fdr = 0.05) {
+  df %>%
+    filter(log2FoldChange > fc) %>%
+    filter(padj < fdr) -> df_up
+  df %>%
+    filter(log2FoldChange < (-1 * fc)) %>%
+    filter(padj < fdr) -> df_down
+  return(c(df_up$Geneid, df_down$Geneid))
 }
 read_deg_file <- function(deg_file) {
-  df = read.csv(file=deg_file,header=TRUE,sep="\t",comment.char = "#",strip.white = TRUE)
-  df %>% select(c("Geneid","log2FoldChange","pvalue","padj")) -> df
-  return (df)
+  df <- read.csv(file = deg_file, header = TRUE, sep = "\t", comment.char = "#", strip.white = TRUE)
+  df %>% select(c("Geneid", "log2FoldChange", "pvalue", "padj")) -> df
+  return(df)
 }
 get_anno_from_deg_file <- function(deg_file) {
-  df = read.csv(file=deg_file,header=TRUE,sep="\t",comment.char = "#",strip.white = TRUE)
-  df %>% select(-c("baseMean","lfcSE","stat","width","strand","log2FoldChange","pvalue","padj")) -> df
-  return (df)
+  df <- read.csv(file = deg_file, header = TRUE, sep = "\t", comment.char = "#", strip.white = TRUE)
+  df %>% select(-c("baseMean", "lfcSE", "stat", "width", "strand", "log2FoldChange", "pvalue", "padj")) -> df
+  return(df)
 }
-rename_columns <- function(df,contrast) {
-  colnames(df) <- c("Geneid",add_prefix(string="log2FoldChange",prefix=contrast),add_prefix(string="pvalue",prefix=contrast),add_prefix(string="padj",prefix=contrast))
+rename_columns <- function(df, contrast) {
+  colnames(df) <- c("Geneid", add_prefix(string = "log2FoldChange", prefix = contrast), add_prefix(string = "pvalue", prefix = contrast), add_prefix(string = "padj", prefix = contrast))
   return(df)
 }
-all_sig_geneids = c()
-dfs = list()
-collist = c()
-for (i in 1:length(degfiles)){
+all_sig_geneids <- c()
+dfs <- list()
+collist <- c()
+for (i in 1:length(degfiles)) {
   # i=1
-  deg_file=paste(params$diffatacdir,degfiles[i],sep="/")
+  deg_file <- paste(params$diffatacdir, degfiles[i], sep = "/")
   print(deg_file)
-  contrast=contrasts[i]
+  contrast <- contrasts[i]
   x <- read_deg_file(deg_file)
-  all_sig_geneids <- c(all_sig_geneids,get_sig_geneids(x,params$FC_cutoff,params$FDR_cutoff))
-  dfs[[contrast]] <- rename_columns(x,contrast)
-  for (j in c("log2FoldChange","pvalue","padj")){
-    k=add_prefix(j,contrast)
-    collist = c(collist,k)
+  all_sig_geneids <- c(all_sig_geneids, get_sig_geneids(x, params$FC_cutoff, params$FDR_cutoff))
+  dfs[[contrast]] <- rename_columns(x, contrast)
+  for (j in c("log2FoldChange", "pvalue", "padj")) {
+    k <- add_prefix(j, contrast)
+    collist <- c(collist, k)
   }
 }
 all_sig_geneids <- unique(all_sig_geneids)
-dfs[["anno"]] <- get_anno_from_deg_file(deg_file=paste(params$diffatacdir,degfiles[1],sep="/"))
-dfs %>% purrr::reduce(inner_join, by="Geneid") -> merged_dfs
-write.table(merged_dfs,file=params$outtsv,quote = FALSE,sep="\t",row.names = FALSE,col.names = TRUE)
-DT::datatable(merged_dfs,rownames = FALSE) %>% DT::formatRound(columns=collist,digits=3)
+dfs[["anno"]] <- get_anno_from_deg_file(deg_file = paste(params$diffatacdir, degfiles[1], sep = "/"))
+dfs %>% purrr::reduce(inner_join, by = "Geneid") -> merged_dfs
+write.table(merged_dfs, file = params$outtsv, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
+DT::datatable(merged_dfs, rownames = FALSE) %>% DT::formatRound(columns = collist, digits = 3)
 ```
 
 ## Heatmap
@@ -160,7 +169,7 @@ DT::datatable(merged_dfs,rownames = FALSE) %>% DT::formatRound(columns=collist,d
 Considering all significantly UP or DOWN ROIs aggregated across all contrasts. (Total `r length(all_sig_geneids)` ROIs)
 
 ```{r heatmap,include=TRUE,echo=FALSE, message=FALSE, results='hide',fig.width=4,fig.height=10,warning=FALSE,message=FALSE,error=FALSE}
-heatmap_matrix=assay(rld1)
-heatmap_matrix=heatmap_matrix[all_sig_geneids,]
-Heatmap(matrix = t(scale(t(heatmap_matrix),scale = TRUE)),show_row_names = FALSE,name=" ")
+heatmap_matrix <- assay(rld1)
+heatmap_matrix <- heatmap_matrix[all_sig_geneids, ]
+Heatmap(matrix = t(scale(t(heatmap_matrix), scale = TRUE)), show_row_names = FALSE, name = " ")
 ```
diff --git a/workflow/scripts/aggregate_results_runner.R b/workflow/scripts/aggregate_results_runner.R
index bde78ed..11a706f 100644
--- a/workflow/scripts/aggregate_results_runner.R
+++ b/workflow/scripts/aggregate_results_runner.R
@@ -3,60 +3,69 @@ suppressPackageStartupMessages(library("argparse"))
 
 
 # create parser object
-parser <- ArgumentParser(description="Wrapper for running aggregate_results.Rmd")
+parser <- ArgumentParser(description = "Wrapper for running aggregate_results.Rmd")
 
 
 parser$add_argument("-m", "--countsmatrix",
-                    type="character",
-                    help="path to countsmatrix",
-                    required=TRUE)
+  type = "character",
+  help = "path to countsmatrix",
+  required = TRUE
+)
 parser$add_argument("-a", "--diffatacdir",
-                    type="character",
-                    help="diff atac dir",
-                    required=TRUE)
+  type = "character",
+  help = "diff atac dir",
+  required = TRUE
+)
 parser$add_argument("-c", "--coldata",
-                    type="character",
-                    help="coldata or sampleinto TSV file",
-                    required=TRUE)
+  type = "character",
+  help = "coldata or sampleinto TSV file",
+  required = TRUE
+)
 parser$add_argument("-f", "--foldchange",
-                    type="double",
-                    help="log2FC threshold",
-                    required=FALSE,
-                    default=2.0)
+  type = "double",
+  help = "log2FC threshold",
+  required = FALSE,
+  default = 2.0
+)
 parser$add_argument("-p", "--fdr",
-                    type="double",
-                    help="adj. p-value threshold",
-                    required=FALSE,
-                    default=0.05)
+  type = "double",
+  help = "adj. p-value threshold",
+  required = FALSE,
+  default = 0.05
+)
 parser$add_argument("-i", "--indexcols",
-                    type="character",
-                    help="comma-separated list of index columns",
-                    required=TRUE)
+  type = "character",
+  help = "comma-separated list of index columns",
+  required = TRUE
+)
 parser$add_argument("-e", "--excludecols",
-                    type="character",
-                    help="comma-separated list of columns to exclude",
-                    required=TRUE)
+  type = "character",
+  help = "comma-separated list of columns to exclude",
+  required = TRUE
+)
 parser$add_argument("-t", "--tmpdir",
-                    type="character",
-                    help="temp dir",
-                    required=FALSE,
-		                default="/tmp")
+  type = "character",
+  help = "temp dir",
+  required = FALSE,
+  default = "/tmp"
+)
 parser$add_argument("-s", "--scriptsdir",
-                    type="character",
-                    help="scripts dir",
-                    required=TRUE)
+  type = "character",
+  help = "scripts dir",
+  required = TRUE
+)
 
 args <- parser$parse_args()
 
 # check inputs
-for ( f in c(args$countmatrix,args$coldata)) {
-  if (!file.exists(f)){
-    outstr=paste("ERROR: File: ",f,"Does not exists!")
+for (f in c(args$countmatrix, args$coldata)) {
+  if (!file.exists(f)) {
+    outstr <- paste("ERROR: File: ", f, "Does not exists!")
     message(outstr)
     quit(status = 1)
   }
-  if (file.access(f,mode=4)!=0){
-    outstr=paste("ERROR: File: ",f,"is not readable!")
+  if (file.access(f, mode = 4) != 0) {
+    outstr <- paste("ERROR: File: ", f, "is not readable!")
     message(outstr)
     quit(status = 1)
   }
@@ -64,20 +73,23 @@ for ( f in c(args$countmatrix,args$coldata)) {
 setwd(args$diffatacdir)
 
 
-outprefix =	"all_diff_atacs"
-outhtml 	=	paste0(args$diffatacdir,"/",outprefix,".html")
-outtsv	 	=	paste0(args$diffatacdir,"/",outprefix,".tsv")
+outprefix <- "all_diff_atacs"
+outhtml <- paste0(args$diffatacdir, "/", outprefix, ".html")
+outtsv <- paste0(args$diffatacdir, "/", outprefix, ".tsv")
 
-myparams <- list(rawcountsmatrix 	= args$countsmatrix,
-                 coldata 		  = args$coldata,
-                 FC_cutoff 		= args$foldchange,
-                 FDR_cutoff 	= args$fdr,
-                 diffatacdir  = args$diffatacdir,
-                 indexcols 		= args$indexcols,
-                 excludecols 	= args$excludecols,
-                 outtsv       = outtsv)
+myparams <- list(
+  rawcountsmatrix = args$countsmatrix,
+  coldata = args$coldata,
+  FC_cutoff = args$foldchange,
+  FDR_cutoff = args$fdr,
+  diffatacdir = args$diffatacdir,
+  indexcols = args$indexcols,
+  excludecols = args$excludecols,
+  outtsv = outtsv
+)
 
-rmarkdown::render(paste(args$scriptsdir,'aggregate_results.Rmd',sep="/"),
-          params	    =	myparams,
-	        output_file	=	outhtml,
-	  intermediates_dir	=	args$tmpdir)
+rmarkdown::render(paste(args$scriptsdir, "aggregate_results.Rmd", sep = "/"),
+  params = myparams,
+  output_file = outhtml,
+  intermediates_dir = args$tmpdir
+)
diff --git a/workflow/scripts/ccbr_annotate_bed.R b/workflow/scripts/ccbr_annotate_bed.R
index 9c51755..b885de6 100644
--- a/workflow/scripts/ccbr_annotate_bed.R
+++ b/workflow/scripts/ccbr_annotate_bed.R
@@ -1,4 +1,3 @@
-
 suppressPackageStartupMessages(library("argparse"))
 suppressPackageStartupMessages(library("dplyr"))
 suppressPackageStartupMessages(library("ChIPseeker"))
@@ -21,149 +20,178 @@ parser <- ArgumentParser()
 # specify our desired options
 # by default ArgumentParser will add an help option
 
-parser$add_argument("-b", "--bed", required=TRUE,
-                    dest="bed", help="narrowpeak file")
-parser$add_argument("-a", "--annotated", required=TRUE, dest="annotated",
-                    help="annotated output file")
-parser$add_argument("-u", "--uptss", required=FALSE, type="integer", default=2000, help="upstream bases from TSS")
-parser$add_argument("-d", "--downtss", required=FALSE, type="integer", default=2000, help="upstream bases from TSS")
-parser$add_argument("-t", "--toppromoterpeaks", required=FALSE, type="integer", default=1000, help="filter top N peaks in promoters for genelist output")
-parser$add_argument("-l", "--genelist", required=TRUE, help="list of genes with peaks in promoter regions")
-parser$add_argument("-f", "--atypefreq", required=TRUE, help="frequency of different annotation types")
-parser$add_argument("-g", "--genome", required=TRUE, dest="genome",
-                    help="hg38/hg19/mm10/mm9/mmul10/bosTau9")
+parser$add_argument("-b", "--bed",
+  required = TRUE,
+  dest = "bed", help = "narrowpeak file"
+)
+parser$add_argument("-a", "--annotated",
+  required = TRUE, dest = "annotated",
+  help = "annotated output file"
+)
+parser$add_argument("-u", "--uptss", required = FALSE, type = "integer", default = 2000, help = "upstream bases from TSS")
+parser$add_argument("-d", "--downtss", required = FALSE, type = "integer", default = 2000, help = "upstream bases from TSS")
+parser$add_argument("-t", "--toppromoterpeaks", required = FALSE, type = "integer", default = 1000, help = "filter top N peaks in promoters for genelist output")
+parser$add_argument("-l", "--genelist", required = TRUE, help = "list of genes with peaks in promoter regions")
+parser$add_argument("-f", "--atypefreq", required = TRUE, help = "frequency of different annotation types")
+parser$add_argument("-g", "--genome",
+  required = TRUE, dest = "genome",
+  help = "hg38/hg19/mm10/mm9/mmul10/bosTau9"
+)
 
 # get command line options, if help option encountered print help and exit,
 # otherwise if options not found on command line then set defaults,
 args <- parser$parse_args()
 
-if (args$genome=="mm9" | args$genome=="mm10"){
-  adb="org.Mm.eg.db"
+if (args$genome == "mm9" | args$genome == "mm10") {
+  adb <- "org.Mm.eg.db"
 }
-if (args$genome=="hg19" | args$genome=="hg38"){
-  adb="org.Hs.eg.db"
+if (args$genome == "hg19" | args$genome == "hg38") {
+  adb <- "org.Hs.eg.db"
 }
 
-if (args$genome=="mmul10"){
-  adb="org.Mmu.eg.db"
+if (args$genome == "mmul10") {
+  adb <- "org.Mmu.eg.db"
 }
 
-if (args$genome=="bosTau9"){
-  adb="org.Bt.eg.db"
+if (args$genome == "bosTau9") {
+  adb <- "org.Bt.eg.db"
 }
 
-if (args$genome=="hg19") {tdb=TxDb.Hsapiens.UCSC.hg19.knownGene}
-if (args$genome=="hg38") {tdb=TxDb.Hsapiens.UCSC.hg38.knownGene}
-if (args$genome=="mm9") {tdb=TxDb.Mmusculus.UCSC.mm9.knownGene}
-if (args$genome=="mm10") {tdb=TxDb.Mmusculus.UCSC.mm10.knownGene}
-if (args$genome=="mmul10") {tdb=TxDb.Mmulatta.UCSC.rheMac10.refGene}
-if (args$genome=="bosTau9") {tdb=TxDb.Btaurus.UCSC.bosTau9.refGene}
+if (args$genome == "hg19") {
+  tdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
+}
+if (args$genome == "hg38") {
+  tdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
+}
+if (args$genome == "mm9") {
+  tdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
+}
+if (args$genome == "mm10") {
+  tdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
+}
+if (args$genome == "mmul10") {
+  tdb <- TxDb.Mmulatta.UCSC.rheMac10.refGene
+}
+if (args$genome == "bosTau9") {
+  tdb <- TxDb.Btaurus.UCSC.bosTau9.refGene
+}
 
 
-np=read.table(args$bed,sep="\t")
-np=np[,seq(1,3)]
-colnames(np)=c("chrom",
-               "chromStart",
-               "chromEnd")
+np <- read.table(args$bed, sep = "\t")
+np <- np[, seq(1, 3)]
+colnames(np) <- c(
+  "chrom",
+  "chromStart",
+  "chromEnd"
+)
 
-np$peakID=paste(np$chrom,":",np$chromStart,"-",np$chromEnd,sep="")
+np$peakID <- paste(np$chrom, ":", np$chromStart, "-", np$chromEnd, sep = "")
 
-peaks=GRanges(seqnames=np$chrom,ranges=IRanges(np$chromStart,np$chromEnd))
+peaks <- GRanges(seqnames = np$chrom, ranges = IRanges(np$chromStart, np$chromEnd))
 
 # using annotatePeak from ChIPseeker
-pa=annotatePeak(peak = peaks,
-                tssRegion = c(-2000,2000),
-                TxDb = tdb,
-                level = "transcript",
-                genomicAnnotationPriority = c("Promoter", "5UTR", "3UTR", "Exon", "Intron", "Downstream", "Intergenic"),
-                annoDb = adb,
-                sameStrand = FALSE,
-                ignoreOverlap = FALSE,
-                ignoreUpstream = FALSE,
-                ignoreDownstream = FALSE,
-                overlap = "TSS")
-
-padf=as.data.frame(pa)
-padf$peakID=paste(padf$seqnames,":",padf$start,"-",padf$end,sep="")
-merged=merge(padf,np,by="peakID")
-merged=merged[,
-              c("peakID",
-                "chrom",
-                "chromStart",
-                "chromEnd",
-                "width",
-                "annotation",
-                "geneChr",
-                "geneStart",
-                "geneEnd",
-                "geneLength",
-                "geneStrand",
-                "geneId",
-                "transcriptId",
-                "distanceToTSS",
-                "ENSEMBL",
-                "SYMBOL",
-                "GENENAME"
-              )]
+pa <- annotatePeak(
+  peak = peaks,
+  tssRegion = c(-2000, 2000),
+  TxDb = tdb,
+  level = "transcript",
+  genomicAnnotationPriority = c("Promoter", "5UTR", "3UTR", "Exon", "Intron", "Downstream", "Intergenic"),
+  annoDb = adb,
+  sameStrand = FALSE,
+  ignoreOverlap = FALSE,
+  ignoreUpstream = FALSE,
+  ignoreDownstream = FALSE,
+  overlap = "TSS"
+)
+
+padf <- as.data.frame(pa)
+padf$peakID <- paste(padf$seqnames, ":", padf$start, "-", padf$end, sep = "")
+merged <- merge(padf, np, by = "peakID")
+merged <- merged[
+  ,
+  c(
+    "peakID",
+    "chrom",
+    "chromStart",
+    "chromEnd",
+    "width",
+    "annotation",
+    "geneChr",
+    "geneStart",
+    "geneEnd",
+    "geneLength",
+    "geneStrand",
+    "geneId",
+    "transcriptId",
+    "distanceToTSS",
+    "ENSEMBL",
+    "SYMBOL",
+    "GENENAME"
+  )
+]
 # Adding the hash to the first colname
-colnames(merged)=c("#peakID",
-                   "chrom",
-                   "chromStart",
-                   "chromEnd",
-                   "width",
-                   "annotation",
-                   "geneChr",
-                   "geneStart",
-                   "geneEnd",
-                   "geneLength",
-                   "geneStrand",
-                   "geneId",
-                   "transcriptId",
-                   "distanceToTSS",
-                   "ENSEMBL",
-                   "SYMBOL",
-                   "GENENAME")
+colnames(merged) <- c(
+  "#peakID",
+  "chrom",
+  "chromStart",
+  "chromEnd",
+  "width",
+  "annotation",
+  "geneChr",
+  "geneStart",
+  "geneEnd",
+  "geneLength",
+  "geneStrand",
+  "geneId",
+  "transcriptId",
+  "distanceToTSS",
+  "ENSEMBL",
+  "SYMBOL",
+  "GENENAME"
+)
 
 # merge annotation with narrowPeak file
-write.table(merged,args$annotated,sep = "\t",quote = FALSE, row.names = FALSE)
-l=paste("# Median peak width : ",median(merged$width),sep="")
-write(l,args$annotated,append=TRUE)
+write.table(merged, args$annotated, sep = "\t", quote = FALSE, row.names = FALSE)
+l <- paste("# Median peak width : ", median(merged$width), sep = "")
+write(l, args$annotated, append = TRUE)
 
 
 # get promoter genes
 # ... all lines with annotation starting with "Promoter"
-promoters1=dplyr::filter(merged,grepl("Promoter",annotation))
+promoters1 <- dplyr::filter(merged, grepl("Promoter", annotation))
 # ... all lines with annotation is "5' UTR"
-promoters2=merged[merged$annotation=="5' UTR",]
-promoters=rbind(promoters1,promoters2)
-promoters=head(promoters,n=args$toppromoterpeaks)
-promoter_genes=unique(promoters[,c("ENSEMBL","SYMBOL")])
-colnames(promoter_genes)=c("#ENSEMBL","SYMBOL")
-write.table(promoter_genes,args$genelist,sep = "\t",quote = FALSE, row.names = FALSE)
-l=paste("# Median peak width : ",median(promoters$width),sep="")
-write(l,args$genelist,append=TRUE)
+promoters2 <- merged[merged$annotation == "5' UTR", ]
+promoters <- rbind(promoters1, promoters2)
+promoters <- head(promoters, n = args$toppromoterpeaks)
+promoter_genes <- unique(promoters[, c("ENSEMBL", "SYMBOL")])
+colnames(promoter_genes) <- c("#ENSEMBL", "SYMBOL")
+write.table(promoter_genes, args$genelist, sep = "\t", quote = FALSE, row.names = FALSE)
+l <- paste("# Median peak width : ", median(promoters$width), sep = "")
+write(l, args$genelist, append = TRUE)
 
 # annotation type frequence table
 
-l=paste("#annotationType","frequency","medianWidth",sep="\t")
-write(l,args$atypefreq)
-atypes=c("3' UTR",
-         "5' UTR",
-         "Distal Intergenic",
-         "Downstream (<1kb)",
-         "Downstream (1-2kb)",
-         "Downstream (2-3kb)",
-         "Promoter (<=1kb)",
-         "Promoter (1-2kb)")
+l <- paste("#annotationType", "frequency", "medianWidth", sep = "\t")
+write(l, args$atypefreq)
+atypes <- c(
+  "3' UTR",
+  "5' UTR",
+  "Distal Intergenic",
+  "Downstream (<1kb)",
+  "Downstream (1-2kb)",
+  "Downstream (2-3kb)",
+  "Promoter (<=1kb)",
+  "Promoter (1-2kb)"
+)
 for (ann in atypes) {
-  x=merged[merged$annotation==ann,]
-  w=median(x$width)
-  l=paste(gsub(" ","",ann),nrow(x),w,sep="\t")
-  write(l,args$atypefreq,append=TRUE)
+  x <- merged[merged$annotation == ann, ]
+  w <- median(x$width)
+  l <- paste(gsub(" ", "", ann), nrow(x), w, sep = "\t")
+  write(l, args$atypefreq, append = TRUE)
 }
-for (ann in c("Exon","Intron")){
-  x=dplyr::filter(merged,grepl(ann,annotation))
-  w=median(x$width)
-  l=paste(gsub(" ","",ann),nrow(x),w,sep="\t")
-  write(l,args$atypefreq,append=TRUE)
+for (ann in c("Exon", "Intron")) {
+  x <- dplyr::filter(merged, grepl(ann, annotation))
+  w <- median(x$width)
+  l <- paste(gsub(" ", "", ann), nrow(x), w, sep = "\t")
+  write(l, args$atypefreq, append = TRUE)
 }
diff --git a/workflow/scripts/ccbr_annotate_peaks.R b/workflow/scripts/ccbr_annotate_peaks.R
index bf3994c..b5113d8 100644
--- a/workflow/scripts/ccbr_annotate_peaks.R
+++ b/workflow/scripts/ccbr_annotate_peaks.R
@@ -1,4 +1,3 @@
-
 suppressPackageStartupMessages(library("argparse"))
 suppressPackageStartupMessages(library("dplyr"))
 suppressPackageStartupMessages(library("ChIPseeker"))
@@ -21,181 +20,210 @@ parser <- ArgumentParser()
 # specify our desired options
 # by default ArgumentParser will add an help option
 
-parser$add_argument("-n", "--narrowpeak", required=TRUE,
-                    dest="narrowpeak", help="narrowpeak file")
-parser$add_argument("-a", "--annotated", required=TRUE, dest="annotated",
-                    help="annotated output file")
-parser$add_argument("-u", "--uptss", required=FALSE, type="integer", default=2000, help="upstream bases from TSS")
-parser$add_argument("-d", "--downtss", required=FALSE, type="integer", default=2000, help="upstream bases from TSS")
-parser$add_argument("-t", "--toppromoterpeaks", required=FALSE, type="integer", default=1000, help="filter top N peaks in promoters for genelist output")
-parser$add_argument("-l", "--genelist", required=TRUE, help="list of genes with peaks in promoter regions")
-parser$add_argument("-f", "--atypefreq", required=TRUE, help="frequency of different annotation types")
-parser$add_argument("-g", "--genome", required=TRUE, dest="genome",
-                    help="hg38/hg19/mm10/mm9/mmul10/bosTau9")
+parser$add_argument("-n", "--narrowpeak",
+  required = TRUE,
+  dest = "narrowpeak", help = "narrowpeak file"
+)
+parser$add_argument("-a", "--annotated",
+  required = TRUE, dest = "annotated",
+  help = "annotated output file"
+)
+parser$add_argument("-u", "--uptss", required = FALSE, type = "integer", default = 2000, help = "upstream bases from TSS")
+parser$add_argument("-d", "--downtss", required = FALSE, type = "integer", default = 2000, help = "upstream bases from TSS")
+parser$add_argument("-t", "--toppromoterpeaks", required = FALSE, type = "integer", default = 1000, help = "filter top N peaks in promoters for genelist output")
+parser$add_argument("-l", "--genelist", required = TRUE, help = "list of genes with peaks in promoter regions")
+parser$add_argument("-f", "--atypefreq", required = TRUE, help = "frequency of different annotation types")
+parser$add_argument("-g", "--genome",
+  required = TRUE, dest = "genome",
+  help = "hg38/hg19/mm10/mm9/mmul10/bosTau9"
+)
 
 # get command line options, if help option encountered print help and exit,
 # otherwise if options not found on command line then set defaults,
 args <- parser$parse_args()
 
-if (args$genome=="mm9" | args$genome=="mm10"){
-  adb="org.Mm.eg.db"
+if (args$genome == "mm9" | args$genome == "mm10") {
+  adb <- "org.Mm.eg.db"
+}
+if (args$genome == "hg19" | args$genome == "hg38") {
+  adb <- "org.Hs.eg.db"
 }
-if (args$genome=="hg19" | args$genome=="hg38"){
-  adb="org.Hs.eg.db"
+
+if (args$genome == "mmul10") {
+  adb <- "org.Mmu.eg.db"
 }
 
-if (args$genome=="mmul10"){
-  adb="org.Mmu.eg.db"
+if (args$genome == "bosTau9") {
+  adb <- "org.Bt.eg.db"
 }
 
-if (args$genome=="bosTau9"){
-  adb="org.Bt.eg.db"
+if (args$genome == "hg19") {
+  tdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
+}
+if (args$genome == "hg38") {
+  tdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
+}
+if (args$genome == "mm9") {
+  tdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
 }
+if (args$genome == "mm10") {
+  tdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
+}
+if (args$genome == "mmul10") {
+  tdb <- TxDb.Mmulatta.UCSC.rheMac10.refGene
+}
+if (args$genome == "bosTau9") {
+  tdb <- TxDb.Btaurus.UCSC.bosTau9.refGene
+}
+
 
-if (args$genome=="hg19") {tdb=TxDb.Hsapiens.UCSC.hg19.knownGene}
-if (args$genome=="hg38") {tdb=TxDb.Hsapiens.UCSC.hg38.knownGene}
-if (args$genome=="mm9") {tdb=TxDb.Mmusculus.UCSC.mm9.knownGene}
-if (args$genome=="mm10") {tdb=TxDb.Mmusculus.UCSC.mm10.knownGene}
-if (args$genome=="mmul10") {tdb=TxDb.Mmulatta.UCSC.rheMac10.refGene}
-if (args$genome=="bosTau9") {tdb=TxDb.Btaurus.UCSC.bosTau9.refGene}
-
-
-np=read.table(args$narrowpeak,sep="\t")
-np=np[,seq(1,10)]
-colnames(np)=c("chrom",
-               "chromStart",
-               "chromEnd",
-               "name",
-               "score",
-               "strand",
-               "signalValue",
-               "pValue",
-               "qValue",
-               "peak")
-np=np[order(-np$qValue),]
-np$peakID=paste(np$chrom,":",np$chromStart,"-",np$chromEnd,sep="")
-
-peaks=GRanges(seqnames=np$chrom,ranges=IRanges(np$chromStart,np$chromEnd))
+np <- read.table(args$narrowpeak, sep = "\t")
+np <- np[, seq(1, 10)]
+colnames(np) <- c(
+  "chrom",
+  "chromStart",
+  "chromEnd",
+  "name",
+  "score",
+  "strand",
+  "signalValue",
+  "pValue",
+  "qValue",
+  "peak"
+)
+np <- np[order(-np$qValue), ]
+np$peakID <- paste(np$chrom, ":", np$chromStart, "-", np$chromEnd, sep = "")
+
+peaks <- GRanges(seqnames = np$chrom, ranges = IRanges(np$chromStart, np$chromEnd))
 
 # using annotatePeak from ChIPseeker
-pa=annotatePeak(peak = peaks,
-                tssRegion = c(-2000,2000),
-                TxDb = tdb,
-                level = "transcript",
-                genomicAnnotationPriority = c("Promoter", "5UTR", "3UTR", "Exon", "Intron", "Downstream", "Intergenic"),
-                annoDb = adb,
-                sameStrand = FALSE,
-                ignoreOverlap = FALSE,
-                ignoreUpstream = FALSE,
-                ignoreDownstream = FALSE,
-                overlap = "TSS")
-
-padf=as.data.frame(pa)
-padf$peakID=paste(padf$seqnames,":",padf$start,"-",padf$end,sep="")
-merged=merge(padf,np,by="peakID")
-merged=merged[,
-              c("peakID",
-                "chrom",
-                "chromStart",
-                "chromEnd",
-                "width",
-                "annotation",
-                "geneChr",
-                "geneStart",
-                "geneEnd",
-                "geneLength",
-                "geneStrand",
-                "geneId",
-                "transcriptId",
-                "distanceToTSS",
-                "ENSEMBL",
-                "SYMBOL",
-                "GENENAME",
-                "score",
-                "signalValue",
-                "pValue",
-                "qValue",
-                "peak"
-              )]
+pa <- annotatePeak(
+  peak = peaks,
+  tssRegion = c(-2000, 2000),
+  TxDb = tdb,
+  level = "transcript",
+  genomicAnnotationPriority = c("Promoter", "5UTR", "3UTR", "Exon", "Intron", "Downstream", "Intergenic"),
+  annoDb = adb,
+  sameStrand = FALSE,
+  ignoreOverlap = FALSE,
+  ignoreUpstream = FALSE,
+  ignoreDownstream = FALSE,
+  overlap = "TSS"
+)
+
+padf <- as.data.frame(pa)
+padf$peakID <- paste(padf$seqnames, ":", padf$start, "-", padf$end, sep = "")
+merged <- merge(padf, np, by = "peakID")
+merged <- merged[
+  ,
+  c(
+    "peakID",
+    "chrom",
+    "chromStart",
+    "chromEnd",
+    "width",
+    "annotation",
+    "geneChr",
+    "geneStart",
+    "geneEnd",
+    "geneLength",
+    "geneStrand",
+    "geneId",
+    "transcriptId",
+    "distanceToTSS",
+    "ENSEMBL",
+    "SYMBOL",
+    "GENENAME",
+    "score",
+    "signalValue",
+    "pValue",
+    "qValue",
+    "peak"
+  )
+]
 # Adding the hash to the first colname
-colnames(merged)=c("#peakID",
-                   "chrom",
-                   "chromStart",
-                   "chromEnd",
-                   "width",
-                   "annotation",
-                   "geneChr",
-                   "geneStart",
-                   "geneEnd",
-                   "geneLength",
-                   "geneStrand",
-                   "geneId",
-                   "transcriptId",
-                   "distanceToTSS",
-                   "ENSEMBL",
-                   "SYMBOL",
-                   "GENENAME",
-                   "score",
-                   "signalValue",
-                   "pValue",
-                   "qValue",
-                   "peak")
+colnames(merged) <- c(
+  "#peakID",
+  "chrom",
+  "chromStart",
+  "chromEnd",
+  "width",
+  "annotation",
+  "geneChr",
+  "geneStart",
+  "geneEnd",
+  "geneLength",
+  "geneStrand",
+  "geneId",
+  "transcriptId",
+  "distanceToTSS",
+  "ENSEMBL",
+  "SYMBOL",
+  "GENENAME",
+  "score",
+  "signalValue",
+  "pValue",
+  "qValue",
+  "peak"
+)
 
 
 # merge annotation with narrowPeak file
-merged=merged[order(-merged$qValue),]
-write.table(merged,args$annotated,sep = "\t",quote = FALSE, row.names = FALSE)
-l=paste("# Median peak width : ",median(merged$width),sep="")
-write(l,args$annotated,append=TRUE)
-l=paste("# Median pValue : ",median(merged$pValue),sep="")
-write(l,args$annotated,append=TRUE)
-l=paste("# Median qValue : ",median(merged$qValue),sep="")
-write(l,args$annotated,append=TRUE)
+merged <- merged[order(-merged$qValue), ]
+write.table(merged, args$annotated, sep = "\t", quote = FALSE, row.names = FALSE)
+l <- paste("# Median peak width : ", median(merged$width), sep = "")
+write(l, args$annotated, append = TRUE)
+l <- paste("# Median pValue : ", median(merged$pValue), sep = "")
+write(l, args$annotated, append = TRUE)
+l <- paste("# Median qValue : ", median(merged$qValue), sep = "")
+write(l, args$annotated, append = TRUE)
 
 
 # get promoter genes
 # ... all lines with annotation starting with "Promoter"
-promoters1=dplyr::filter(merged,grepl("Promoter",annotation))
+promoters1 <- dplyr::filter(merged, grepl("Promoter", annotation))
 # ... all lines with annotation is "5' UTR"
-promoters2=merged[merged$annotation=="5' UTR",]
-promoters=rbind(promoters1,promoters2)
-promoters=promoters[order(-promoters$qValue),]
-promoters=head(promoters,n=args$toppromoterpeaks)
-promoter_genes=unique(promoters[,c("ENSEMBL","SYMBOL")])
-colnames(promoter_genes)=c("#ENSEMBL","SYMBOL")
-write.table(promoter_genes,args$genelist,sep = "\t",quote = FALSE, row.names = FALSE)
-l=paste("# Median peak width : ",median(promoters$width),sep="")
-write(l,args$genelist,append=TRUE)
-l=paste("# Median pValue : ",median(promoters$pValue),sep="")
-write(l,args$genelist,append=TRUE)
-l=paste("# Median qValue : ",median(promoters$qValue),sep="")
-write(l,args$genelist,append=TRUE)
+promoters2 <- merged[merged$annotation == "5' UTR", ]
+promoters <- rbind(promoters1, promoters2)
+promoters <- promoters[order(-promoters$qValue), ]
+promoters <- head(promoters, n = args$toppromoterpeaks)
+promoter_genes <- unique(promoters[, c("ENSEMBL", "SYMBOL")])
+colnames(promoter_genes) <- c("#ENSEMBL", "SYMBOL")
+write.table(promoter_genes, args$genelist, sep = "\t", quote = FALSE, row.names = FALSE)
+l <- paste("# Median peak width : ", median(promoters$width), sep = "")
+write(l, args$genelist, append = TRUE)
+l <- paste("# Median pValue : ", median(promoters$pValue), sep = "")
+write(l, args$genelist, append = TRUE)
+l <- paste("# Median qValue : ", median(promoters$qValue), sep = "")
+write(l, args$genelist, append = TRUE)
 
 # annotation type frequence table
 
-l=paste("#annotationType","frequency","medianWidth","medianpValue","medianqValue",sep="\t")
-write(l,args$atypefreq)
-atypes=c("3' UTR",
-         "5' UTR",
-         "Distal Intergenic",
-         "Downstream (<1kb)",
-         "Downstream (1-2kb)",
-         "Downstream (2-3kb)",
-         "Promoter (<=1kb)",
-         "Promoter (1-2kb)")
+l <- paste("#annotationType", "frequency", "medianWidth", "medianpValue", "medianqValue", sep = "\t")
+write(l, args$atypefreq)
+atypes <- c(
+  "3' UTR",
+  "5' UTR",
+  "Distal Intergenic",
+  "Downstream (<1kb)",
+  "Downstream (1-2kb)",
+  "Downstream (2-3kb)",
+  "Promoter (<=1kb)",
+  "Promoter (1-2kb)"
+)
 for (ann in atypes) {
-  x=merged[merged$annotation==ann,]
-  w=median(x$width)
-  p=median(x$pValue)
-  q=median(x$qValue)
-  l=paste(gsub(" ","",ann),nrow(x),w,p,q,sep="\t")
-  write(l,args$atypefreq,append=TRUE)
+  x <- merged[merged$annotation == ann, ]
+  w <- median(x$width)
+  p <- median(x$pValue)
+  q <- median(x$qValue)
+  l <- paste(gsub(" ", "", ann), nrow(x), w, p, q, sep = "\t")
+  write(l, args$atypefreq, append = TRUE)
 }
-for (ann in c("Exon","Intron")){
-  x=dplyr::filter(merged,grepl(ann,annotation))
-  w=median(x$width)
-  p=median(x$pValue)
-  q=median(x$qValue)
-  l=paste(gsub(" ","",ann),nrow(x),w,p,q,sep="\t")
-  write(l,args$atypefreq,append=TRUE)
+for (ann in c("Exon", "Intron")) {
+  x <- dplyr::filter(merged, grepl(ann, annotation))
+  w <- median(x$width)
+  p <- median(x$pValue)
+  q <- median(x$qValue)
+  l <- paste(gsub(" ", "", ann), nrow(x), w, p, q, sep = "\t")
+  write(l, args$atypefreq, append = TRUE)
 }
diff --git a/workflow/scripts/fixed_width_peakSets_to_consensus_peakSet.R b/workflow/scripts/fixed_width_peakSets_to_consensus_peakSet.R
index fc49a2d..5e75f70 100644
--- a/workflow/scripts/fixed_width_peakSets_to_consensus_peakSet.R
+++ b/workflow/scripts/fixed_width_peakSets_to_consensus_peakSet.R
@@ -2,239 +2,259 @@
 suppressPackageStartupMessages(library("argparse"))
 
 # create parser object
-parser <- ArgumentParser(description="Convert narrowPeak output to fixedwidth peakset as per the pan-cancer genome paper (https://doi.org/10.1126/science.aav1898) using the script narrowPeak_to_fixed_width_peakSet.R. This involves ...First, all replicate peak sets are combined into a cumulative peak set and trimmed for overlap using the same iterative procedure as narrowPeak_to_fixed_width_peakSet, i.e, keep the most significant (based of score per million or normalized p-value) peak and discards any peak that overlaps directly with the most significant peak. To identify reproducible peaks from this merged peak set, the individual replicate peak sets were overlapped with the merged peak set. Peaks from the merged peak set that were observed in at least 2(minReps) samples with a score per million value >=5(cutoffSPM) were labeled as reproducible and reported in the output narrowPeak file. NOTE: this script requires bedtools to be in the PATH")
+parser <- ArgumentParser(description = "Convert narrowPeak output to fixedwidth peakset as per the pan-cancer genome paper (https://doi.org/10.1126/science.aav1898) using the script narrowPeak_to_fixed_width_peakSet.R. This involves ...First, all replicate peak sets are combined into a cumulative peak set and trimmed for overlap using the same iterative procedure as narrowPeak_to_fixed_width_peakSet, i.e, keep the most significant (based of score per million or normalized p-value) peak and discards any peak that overlaps directly with the most significant peak. To identify reproducible peaks from this merged peak set, the individual replicate peak sets were overlapped with the merged peak set. Peaks from the merged peak set that were observed in at least 2(minReps) samples with a score per million value >=5(cutoffSPM) were labeled as reproducible and reported in the output narrowPeak file. NOTE: this script requires bedtools to be in the PATH")
 
 # specify our desired options
 # by default ArgumentParser will add a help option
 parser$add_argument("-i", "--inputNarrowPeaks",
-          type="character",
-          help="absolute fullpaths to fixed width narrowPeak input files from narrowPeak_to_fixed_width_peakSet.R",
-          required=TRUE)
+  type = "character",
+  help = "absolute fullpaths to fixed width narrowPeak input files from narrowPeak_to_fixed_width_peakSet.R",
+  required = TRUE
+)
 parser$add_argument("-p", "--inputPrefixes",
-          type="character",
-          help="prefixes to be added to the peaknames from the inputNarrowPeaks to avoid name clashes",
-          required=TRUE)
+  type = "character",
+  help = "prefixes to be added to the peaknames from the inputNarrowPeaks to avoid name clashes",
+  required = TRUE
+)
 parser$add_argument("-o", "--outputNarrowPeak",
-          type="character",
-          help="absolute fullpath to narrowPeak output file",
-          required=TRUE)
+  type = "character",
+  help = "absolute fullpath to narrowPeak output file",
+  required = TRUE
+)
 parser$add_argument("-t", "--tmpdir",
-          type="character",
-          help="tmp dir",
-          required=FALSE,
-          default=NULL)
+  type = "character",
+  help = "tmp dir",
+  required = FALSE,
+  default = NULL
+)
 parser$add_argument("-m", "--minReps",
-          type="integer",
-          help="minimum replicates that need to overlap for any peak to be considered a consensus peak",
-          required=FALSE,
-          default=2)
+  type = "integer",
+  help = "minimum replicates that need to overlap for any peak to be considered a consensus peak",
+  required = FALSE,
+  default = 2
+)
 parser$add_argument("-c", "--cutoffSPM",
-          type="integer",
-          help="cutoff score per million or normalized p-value. All minimum number of replicate peaks that need to overlap with a consensus peak should also be greater than this cutoff score per million or SPM",
-          required=FALSE,
-          default=5)
+  type = "integer",
+  help = "cutoff score per million or normalized p-value. All minimum number of replicate peaks that need to overlap with a consensus peak should also be greater than this cutoff score per million or SPM",
+  required = FALSE,
+  default = 5
+)
 
 # parse arguments
 args <- parser$parse_args()
 
 # set thresholds from arguments
-norm_pvalue_threshold=args$cutoffSPM
-min_replicates=args$minReps
+norm_pvalue_threshold <- args$cutoffSPM
+min_replicates <- args$minReps
 
 suppressPackageStartupMessages(library("tidyverse"))
 
-debug=0
+debug <- 0
 
 # split input arguments into lists
-narrowPeaks=unlist(strsplit(args$inputNarrowPeak,","))
-prefixes=unlist(strsplit(args$inputPrefixes,","))
-out_narrowPeak=args$outputNarrowPeak
+narrowPeaks <- unlist(strsplit(args$inputNarrowPeak, ","))
+prefixes <- unlist(strsplit(args$inputPrefixes, ","))
+out_narrowPeak <- args$outputNarrowPeak
 
 # debug mode settings
-if (debug==1){
-  narrowPeaks=unlist(strsplit("/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/fixed_width_peaks/HCC2157_1.genrich.samplePeakSet.narrowPeak,/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/fixed_width_peaks/HCC2157_2.genrich.samplePeakSet.narrowPeak",","))
-  prefixes=unlist(strsplit("HCC2157_1,HCC2157_2",","))
-  out_narrowPeak="HCC2157.genrich.consensus.narrowPeak"
+if (debug == 1) {
+  narrowPeaks <- unlist(strsplit("/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/fixed_width_peaks/HCC2157_1.genrich.samplePeakSet.narrowPeak,/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/fixed_width_peaks/HCC2157_2.genrich.samplePeakSet.narrowPeak", ","))
+  prefixes <- unlist(strsplit("HCC2157_1,HCC2157_2", ","))
+  out_narrowPeak <- "HCC2157.genrich.consensus.narrowPeak"
   setwd(dirname(out_narrowPeak))
-  norm_pvalue_threshold=5
-  min_replicates=2
+  norm_pvalue_threshold <- 5
+  min_replicates <- 2
 } else {
-  if(is.null(args$tmpdir)){
-  tmpdir=setwd(dirname(out_narrowPeak))
+  if (is.null(args$tmpdir)) {
+    tmpdir <- setwd(dirname(out_narrowPeak))
   } else {
-  tmpdir=args$tmpdir
+    tmpdir <- args$tmpdir
   }
   setwd(tmpdir)
 }
 
-bn=basename(out_narrowPeak)
+bn <- basename(out_narrowPeak)
 
 # function to read narrowPeak files
-readnarrowpeak<-function(narrowPeak,prefix){
-  x=read.csv(narrowPeak,
-       header=FALSE,
-       sep = "\t",
-       check.names = FALSE,
-       strip.white = TRUE
+readnarrowpeak <- function(narrowPeak, prefix) {
+  x <- read.csv(narrowPeak,
+    header = FALSE,
+    sep = "\t",
+    check.names = FALSE,
+    strip.white = TRUE
   )
 
-  colnames(x)=c("chrom",
-        "start",
-        "end",
-        "peakname",
-        "score",
-        "strand",
-        "signalValue",
-        "normalizedpvalue",
-        "qvalue",
-        "summitdist")
-  x$peakname=paste(prefix,x$peakname,sep="_")
+  colnames(x) <- c(
+    "chrom",
+    "start",
+    "end",
+    "peakname",
+    "score",
+    "strand",
+    "signalValue",
+    "normalizedpvalue",
+    "qvalue",
+    "summitdist"
+  )
+  x$peakname <- paste(prefix, x$peakname, sep = "_")
   return(x)
 }
 
 # read the first narrowPeak file
-xdf <- readnarrowpeak(narrowPeaks[1],prefixes[1])
+xdf <- readnarrowpeak(narrowPeaks[1], prefixes[1])
 
 # read and combine the rest of the narrowPeak files
 for (i in 2:length(narrowPeaks)) {
-  xdf2=readnarrowpeak(narrowPeaks[i],prefixes[i])
-  xdf=rbind(xdf,xdf2)
+  xdf2 <- readnarrowpeak(narrowPeaks[i], prefixes[i])
+  xdf <- rbind(xdf, xdf2)
 }
 
 # create temporary file names
-tmp1bed=paste(bn,"tmp1.bed",sep=".")
-tmp=paste(bn,"tmp",sep=".")
+tmp1bed <- paste(bn, "tmp1.bed", sep = ".")
+tmp <- paste(bn, "tmp", sep = ".")
 
 # convert columns to integer and sort by normalized p-value
-mutate_at(xdf,vars("start","end","summitdist"),as.integer) -> xdf
-arrange(xdf,desc(normalizedpvalue)) -> xdf
-xdf %>% mutate(rank=row_number()) -> xdf
+mutate_at(xdf, vars("start", "end", "summitdist"), as.integer) -> xdf
+arrange(xdf, desc(normalizedpvalue)) -> xdf
+xdf %>% mutate(rank = row_number()) -> xdf
 
 # write the combined peaks to a temporary file
-write.table(xdf,file=tmp1bed,
-      sep="\t",
-      row.names = FALSE,
-      col.names = FALSE,
-      quote = FALSE)
+write.table(xdf,
+  file = tmp1bed,
+  sep = "\t",
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE
+)
 
 # run bedtools intersect to find overlapping peaks
-cmd=paste("bedtools intersect -wa -wb -a",tmp1bed,"-b",tmp1bed,">",tmp)
+cmd <- paste("bedtools intersect -wa -wb -a", tmp1bed, "-b", tmp1bed, ">", tmp)
 print(cmd)
 system(cmd)
 
 # read the intersected peaks
-x2=read.csv(tmp,
-       header=FALSE,
-       sep = "\t",
-       check.names = FALSE,
-       strip.white = TRUE
+x2 <- read.csv(tmp,
+  header = FALSE,
+  sep = "\t",
+  check.names = FALSE,
+  strip.white = TRUE
+)
+colnames(x2) <- c(
+  "chrom",
+  "start",
+  "end",
+  "peakname",
+  "score",
+  "strand",
+  "signalValue",
+  "normalizedpvalue",
+  "qvalue",
+  "summitdist",
+  "rank",
+  "chrom2",
+  "start2",
+  "end2",
+  "peakname2",
+  "score2",
+  "strand2",
+  "signalValue2",
+  "normalizedpvalue2",
+  "qvalue2",
+  "summitdist2",
+  "rankb"
 )
-colnames(x2)=c("chrom",
-        "start",
-        "end",
-        "peakname",
-        "score",
-        "strand",
-        "signalValue",
-        "normalizedpvalue",
-        "qvalue",
-        "summitdist",
-        "rank",
-        "chrom2",
-        "start2",
-        "end2",
-        "peakname2",
-        "score2",
-        "strand2",
-        "signalValue2",
-        "normalizedpvalue2",
-        "qvalue2",
-        "summitdist2",
-        "rankb")
 
 # determine which peaks to keep and which to remove
-keep1_ranks=x2[x2$rank==x2$rankb,]$rank
-remove_ranks=x2[x2$rank<x2$rankb,]$rankb
-keep_ranks=setdiff(keep1_ranks,remove_ranks)
+keep1_ranks <- x2[x2$rank == x2$rankb, ]$rank
+remove_ranks <- x2[x2$rank < x2$rankb, ]$rankb
+keep_ranks <- setdiff(keep1_ranks, remove_ranks)
 
 # filter the peaks to keep
-xdf2=xdf[xdf$rank %in% keep_ranks,]
-
-xdf2=xdf2[,c("chrom",
-          "start",
-          "end",
-          "peakname",
-          "score",
-          "strand",
-          "signalValue",
-          "normalizedpvalue",
-          "qvalue",
-          "summitdist")]
+xdf2 <- xdf[xdf$rank %in% keep_ranks, ]
+
+xdf2 <- xdf2[, c(
+  "chrom",
+  "start",
+  "end",
+  "peakname",
+  "score",
+  "strand",
+  "signalValue",
+  "normalizedpvalue",
+  "qvalue",
+  "summitdist"
+)]
 
 # create another temporary file name
-tmp2bed=paste(bn,"tmp2.bed",sep=".")
-mutate_at(xdf2,vars("start","end","summitdist"),as.integer) -> xdf2
+tmp2bed <- paste(bn, "tmp2.bed", sep = ".")
+mutate_at(xdf2, vars("start", "end", "summitdist"), as.integer) -> xdf2
 
 # write the filtered peaks to a temporary file
-write.table(xdf2,file=tmp2bed,
-      sep="\t",
-      row.names = FALSE,
-      col.names = FALSE,
-      quote = FALSE)
+write.table(xdf2,
+  file = tmp2bed,
+  sep = "\t",
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE
+)
 
 # initialize a data frame to count overlaps
-peaksoverlapcount=data.frame(peakname=xdf2$peakname,count=0)
+peaksoverlapcount <- data.frame(peakname = xdf2$peakname, count = 0)
 
 # count overlaps for each narrowPeak file
 for (i in 1:length(narrowPeaks)) {
-  cmd=paste("bedtools intersect -wa -wb -a",tmp2bed,"-b",narrowPeaks[i],">",tmp)
+  cmd <- paste("bedtools intersect -wa -wb -a", tmp2bed, "-b", narrowPeaks[i], ">", tmp)
   print(cmd)
   system(cmd)
-  x2=read.csv(tmp,
-        header=FALSE,
-        sep = "\t",
-        check.names = FALSE,
-        strip.white = TRUE
+  x2 <- read.csv(tmp,
+    header = FALSE,
+    sep = "\t",
+    check.names = FALSE,
+    strip.white = TRUE
+  )
+  colnames(x2) <- c(
+    "chrom",
+    "start",
+    "end",
+    "peakname",
+    "score",
+    "strand",
+    "signalValue",
+    "normalizedpvalue",
+    "qvalue",
+    "summitdist",
+    "chrom2",
+    "start2",
+    "end2",
+    "peakname2",
+    "score2",
+    "strand2",
+    "signalValue2",
+    "normalizedpvalue2",
+    "qvalue2",
+    "summitdist2"
   )
-  colnames(x2)=c("chrom",
-         "start",
-         "end",
-         "peakname",
-         "score",
-         "strand",
-         "signalValue",
-         "normalizedpvalue",
-         "qvalue",
-         "summitdist",
-         "chrom2",
-         "start2",
-         "end2",
-         "peakname2",
-         "score2",
-         "strand2",
-         "signalValue2",
-         "normalizedpvalue2",
-         "qvalue2",
-         "summitdist2")
-  x2[x2$normalizedpvalue2>norm_pvalue_threshold,]->x2
-  update_peaks=peaksoverlapcount$peakname %in% x2$peakname
+  x2[x2$normalizedpvalue2 > norm_pvalue_threshold, ] -> x2
+  update_peaks <- peaksoverlapcount$peakname %in% x2$peakname
   peaksoverlapcount[update_peaks, "count"] <- 1 + peaksoverlapcount[update_peaks, "count"]
 
-  system(paste("rm -f",tmp))
+  system(paste("rm -f", tmp))
 }
 
 # filter peaks based on overlap count
-keep_peaks=peaksoverlapcount[peaksoverlapcount$count >= min_replicates,]$peakname
-xdf3=xdf2[xdf2$peakname %in% keep_peaks,]
-xdf3$score=peaksoverlapcount[peaksoverlapcount$peakname %in% keep_peaks,]$count
-mutate_at(xdf3,vars("start","end","summitdist"),as.integer) -> xdf3
+keep_peaks <- peaksoverlapcount[peaksoverlapcount$count >= min_replicates, ]$peakname
+xdf3 <- xdf2[xdf2$peakname %in% keep_peaks, ]
+xdf3$score <- peaksoverlapcount[peaksoverlapcount$peakname %in% keep_peaks, ]$count
+mutate_at(xdf3, vars("start", "end", "summitdist"), as.integer) -> xdf3
 
 # write the final consensus peaks to the output file
-write.table(xdf3,file=out_narrowPeak,
-      sep="\t",
-      row.names = FALSE,
-      col.names = FALSE,
-      quote = FALSE)
+write.table(xdf3,
+  file = out_narrowPeak,
+  sep = "\t",
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE
+)
 
 # clean up temporary files
-system(paste("rm -f",tmp1bed,tmp2bed))
+system(paste("rm -f", tmp1bed, tmp2bed))
diff --git a/workflow/scripts/narrowPeak_normalize_pvalues.R b/workflow/scripts/narrowPeak_normalize_pvalues.R
index 338810b..dda394b 100644
--- a/workflow/scripts/narrowPeak_normalize_pvalues.R
+++ b/workflow/scripts/narrowPeak_normalize_pvalues.R
@@ -2,75 +2,81 @@
 suppressPackageStartupMessages(library("argparse"))
 
 # create parser object
-parser <- ArgumentParser(description="This script can be used to normalize p-values of individual peaks so that replicates and sample with different read-depths and number of peaks can be directly compared with each other as per the pan-cancer genome paper (https://doi.org/10.1126/science.aav1898). To normalize, the peak scores (-log10(p-value)) for each sample were converted to a score per million by dividing each individual peak score by the sum of all of the peak scores in the given sample divided by 1 million.")
+parser <- ArgumentParser(description = "This script can be used to normalize p-values of individual peaks so that replicates and sample with different read-depths and number of peaks can be directly compared with each other as per the pan-cancer genome paper (https://doi.org/10.1126/science.aav1898). To normalize, the peak scores (-log10(p-value)) for each sample were converted to a score per million by dividing each individual peak score by the sum of all of the peak scores in the given sample divided by 1 million.")
 
 # specify our desired options
 # by default ArgumentParser will add an help option
 
 parser$add_argument("-i", "--inputNarrowPeak",
-                    type="character",
-                    help="narrowPeak input file",
-                    required=TRUE)
+  type = "character",
+  help = "narrowPeak input file",
+  required = TRUE
+)
 parser$add_argument("-o", "--outputNarrowPeak",
-                    type="character",
-                    help="narrowPeak output file",
-                    required=FALSE,
-                    default=NULL)
+  type = "character",
+  help = "narrowPeak output file",
+  required = FALSE,
+  default = NULL
+)
 
 
 args <- parser$parse_args()
 suppressPackageStartupMessages(library("tidyverse"))
 
-debug=0
+debug <- 0
 
-narrowPeak=args$inputNarrowPeak
+narrowPeak <- args$inputNarrowPeak
 
-if (debug==1){
-narrowPeak="/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/uniform_width_peaks/HCC2157_1.genrich.narrowPeak"
-out_narrowPeak=gsub("narrowPeak$","pvalue_normalized.narrowPeak",narrowPeak)
-setwd(dirname(narrowPeak))
+if (debug == 1) {
+  narrowPeak <- "/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/uniform_width_peaks/HCC2157_1.genrich.narrowPeak"
+  out_narrowPeak <- gsub("narrowPeak$", "pvalue_normalized.narrowPeak", narrowPeak)
+  setwd(dirname(narrowPeak))
 } else {
-  if(is.null(args$outputNarrowPeak)){
-    out_narrowPeak=gsub("narrowPeak$","pvalue_normalized.narrowPeak",narrowPeak)
+  if (is.null(args$outputNarrowPeak)) {
+    out_narrowPeak <- gsub("narrowPeak$", "pvalue_normalized.narrowPeak", narrowPeak)
   } else {
-    out_narrowPeak=args$outputNarrowPeak
+    out_narrowPeak <- args$outputNarrowPeak
   }
-  if(is.null(args$tmpdir)){
-    tmpdir=setwd(dirname(narrowPeak))
+  if (is.null(args$tmpdir)) {
+    tmpdir <- setwd(dirname(narrowPeak))
   } else {
-    tmpdir=args$tmpdir
+    tmpdir <- args$tmpdir
   }
   setwd(tmpdir)
 }
-bn=basename(narrowPeak)
+bn <- basename(narrowPeak)
 
 
-x=read.csv(narrowPeak,
-           header=FALSE,
-           sep = "\t",
-           check.names = FALSE,
-           strip.white = TRUE
-          )
+x <- read.csv(narrowPeak,
+  header = FALSE,
+  sep = "\t",
+  check.names = FALSE,
+  strip.white = TRUE
+)
 # head(x)
-colnames(x)=c("chrom",
-              "start",
-              "end",
-              "peakname",
-              "score",
-              "strand",
-              "signalValue",
-              "pvalue",
-              "qvalue",
-              "summitdist")
-x$normalized_pvalue=x$pvalue/sum(x$pvalue)*1e6
+colnames(x) <- c(
+  "chrom",
+  "start",
+  "end",
+  "peakname",
+  "score",
+  "strand",
+  "signalValue",
+  "pvalue",
+  "qvalue",
+  "summitdist"
+)
+x$normalized_pvalue <- x$pvalue / sum(x$pvalue) * 1e6
 hist(x$normalized_pvalue)
 summary(x$normalized_pvalue)
-x=arrange(x,desc(normalized_pvalue),desc(signalValue))
+x <- arrange(x, desc(normalized_pvalue), desc(signalValue))
 # head(x)
-x[,c("chrom","start","end","peakname","score","strand","signalValue","normalized_pvalue","qvalue","summitdist")] -> xdf
-mutate_at(xdf,vars("start","end","summitdist"),as.integer) -> xdf
-write.table(xdf,file=out_narrowPeak,
-            sep="\t",
-            row.names = FALSE,
-            col.names = FALSE,
-            quote = FALSE)
+x[, c("chrom", "start", "end", "peakname", "score", "strand", "signalValue", "normalized_pvalue", "qvalue", "summitdist")] -> xdf
+mutate_at(xdf, vars("start", "end", "summitdist"), as.integer) -> xdf
+write.table(xdf,
+  file = out_narrowPeak,
+  sep = "\t",
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE
+)
diff --git a/workflow/scripts/narrowPeak_to_fixed_width_peakSet.R b/workflow/scripts/narrowPeak_to_fixed_width_peakSet.R
index 4506429..81da1f2 100644
--- a/workflow/scripts/narrowPeak_to_fixed_width_peakSet.R
+++ b/workflow/scripts/narrowPeak_to_fixed_width_peakSet.R
@@ -2,149 +2,163 @@
 suppressPackageStartupMessages(library("argparse"))
 
 # create parser object
-parser <- ArgumentParser(description="Convert narrowPeak output to fixedwidth peakset as per the pan-cancer genome paper (https://doi.org/10.1126/science.aav1898). This involves ... First, the most significant peak is kept and any peak that directly overlaps with that significant peak is removed. Then, this process iterates to the next most significant peak and so on until all peaks have either been kept or removed due to direct overlap with a more significant peak. This prevents the removal of peaks due to daisy chaining or indirect overlap and simultaneously maintains a compendium of fixed-width peaks. The significance of each peak is determined by normalized p-values.  To normalize, the peak scores (-log10(p-value)) for each sample were converted to a score per million by dividing each individual peak score by the sum of all of the peak scores in the given sample divided by 1 million. NOTE: this script requires bedtools to be in the PATH")
+parser <- ArgumentParser(description = "Convert narrowPeak output to fixedwidth peakset as per the pan-cancer genome paper (https://doi.org/10.1126/science.aav1898). This involves ... First, the most significant peak is kept and any peak that directly overlaps with that significant peak is removed. Then, this process iterates to the next most significant peak and so on until all peaks have either been kept or removed due to direct overlap with a more significant peak. This prevents the removal of peaks due to daisy chaining or indirect overlap and simultaneously maintains a compendium of fixed-width peaks. The significance of each peak is determined by normalized p-values.  To normalize, the peak scores (-log10(p-value)) for each sample were converted to a score per million by dividing each individual peak score by the sum of all of the peak scores in the given sample divided by 1 million. NOTE: this script requires bedtools to be in the PATH")
 
 # specify our desired options
 # by default ArgumentParser will add an help option
 
 parser$add_argument("-i", "--inputNarrowPeak",
-          type="character",
-          help="narrowPeak input file absolute full path",
-          required=TRUE)
+  type = "character",
+  help = "narrowPeak input file absolute full path",
+  required = TRUE
+)
 parser$add_argument("-o", "--outputNarrowPeak",
-          type="character",
-          help="narrowPeak output file absolute full path",
-          required=FALSE,
-          default=NULL)
+  type = "character",
+  help = "narrowPeak output file absolute full path",
+  required = FALSE,
+  default = NULL
+)
 parser$add_argument("-t", "--tmpdir",
-          type="character",
-          help="tmp dir",
-          required=FALSE,
-          default=NULL)
+  type = "character",
+  help = "tmp dir",
+  required = FALSE,
+  default = NULL
+)
 parser$add_argument("-w", "--peakWidth",
-          type="integer",
-          help="desired peak width",
-          required=FALSE,
-          default=500)
+  type = "integer",
+  help = "desired peak width",
+  required = FALSE,
+  default = 500
+)
 
 # parse arguments
 args <- parser$parse_args()
-offset <- round(args$peakWidth/2)
+offset <- round(args$peakWidth / 2)
 
 suppressPackageStartupMessages(library("tidyverse"))
 
-debug=0
+debug <- 0
 
-narrowPeak=args$inputNarrowPeak
+narrowPeak <- args$inputNarrowPeak
 
 # Debug mode settings
-if (debug==1){
-  narrowPeak="/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/uniform_width_peaks/HCC2157_1.genrich.narrowPeak"
-  out_narrowPeak=gsub("narrowPeak$","fixed_width.narrowPeak",narrowPeak)
+if (debug == 1) {
+  narrowPeak <- "/Volumes/Ambs_ATACseq/analysis/project1/CCBR_ATACseq_102621/results/peaks/genrich/uniform_width_peaks/HCC2157_1.genrich.narrowPeak"
+  out_narrowPeak <- gsub("narrowPeak$", "fixed_width.narrowPeak", narrowPeak)
   setwd(dirname(narrowPeak))
 } else {
   # Set output file and temporary directory
-  if(is.null(args$outputNarrowPeak)){
-  out_narrowPeak=gsub("narrowPeak$","fixed_width.narrowPeak",narrowPeak)
+  if (is.null(args$outputNarrowPeak)) {
+    out_narrowPeak <- gsub("narrowPeak$", "fixed_width.narrowPeak", narrowPeak)
   } else {
-  out_narrowPeak=args$outputNarrowPeak
+    out_narrowPeak <- args$outputNarrowPeak
   }
-  if(is.null(args$tmpdir)){
-  tmpdir=setwd(dirname(narrowPeak))
+  if (is.null(args$tmpdir)) {
+    tmpdir <- setwd(dirname(narrowPeak))
   } else {
-  tmpdir=args$tmpdir
+    tmpdir <- args$tmpdir
   }
   setwd(tmpdir)
 }
-bn=basename(narrowPeak)
+bn <- basename(narrowPeak)
 
 # Read narrowPeak file
-x=read.csv(narrowPeak,
-       header=FALSE,
-       sep = "\t",
-       check.names = FALSE,
-       strip.white = TRUE
-      )
+x <- read.csv(narrowPeak,
+  header = FALSE,
+  sep = "\t",
+  check.names = FALSE,
+  strip.white = TRUE
+)
 # Set column names
-colnames(x)=c("chrom",
-        "start",
-        "end",
-        "peakname",
-        "score",
-        "strand",
-        "signalValue",
-        "pvalue",
-        "qvalue",
-        "summitdist")
+colnames(x) <- c(
+  "chrom",
+  "start",
+  "end",
+  "peakname",
+  "score",
+  "strand",
+  "signalValue",
+  "pvalue",
+  "qvalue",
+  "summitdist"
+)
 
 # Normalize p-values
-x$normalized_pvalue=x$pvalue/sum(x$pvalue)*1e6
+x$normalized_pvalue <- x$pvalue / sum(x$pvalue) * 1e6
 hist(x$normalized_pvalue)
 
 # Sort peaks by normalized p-value and signal value
-x=arrange(x,desc(normalized_pvalue),desc(signalValue))
+x <- arrange(x, desc(normalized_pvalue), desc(signalValue))
 
 # Calculate new start and end positions
-x$newstart=x$start+x$summitdist-offset
-x$newend=x$start+x$summitdist+offset
-x$newstart<-replace(x$newstart,x$newstart<1,1)
-x %>% mutate(rank=row_number()) -> x
+x$newstart <- x$start + x$summitdist - offset
+x$newend <- x$start + x$summitdist + offset
+x$newstart <- replace(x$newstart, x$newstart < 1, 1)
+x %>% mutate(rank = row_number()) -> x
 
 # Create a dataframe with selected columns
-x[,c("chrom","newstart","newend","peakname","rank","strand")] %>%
-  mutate_at(vars("newstart","newend","rank"),as.integer) -> xdf
+x[, c("chrom", "newstart", "newend", "peakname", "rank", "strand")] %>%
+  mutate_at(vars("newstart", "newend", "rank"), as.integer) -> xdf
 
 # Write temporary bed file
-tmp1bed=paste(bn,"tmp1.bed",sep=".")
-tmp=paste(bn,"tmp",sep=".")
-write.table(xdf,file=tmp1bed,
-      sep="\t",
-      row.names = FALSE,
-      col.names = FALSE,
-      quote = FALSE)
+tmp1bed <- paste(bn, "tmp1.bed", sep = ".")
+tmp <- paste(bn, "tmp", sep = ".")
+write.table(xdf,
+  file = tmp1bed,
+  sep = "\t",
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE
+)
 
 # Run bedtools intersect command
-cmd=paste("bedtools intersect -wa -wb -a",tmp1bed,"-b",tmp1bed,">",tmp)
+cmd <- paste("bedtools intersect -wa -wb -a", tmp1bed, "-b", tmp1bed, ">", tmp)
 system(cmd)
 
 # Read the intersected file
-x2=read.csv(tmp,
-       header=FALSE,
-       sep = "\t",
-       check.names = FALSE,
-       strip.white = TRUE
+x2 <- read.csv(tmp,
+  header = FALSE,
+  sep = "\t",
+  check.names = FALSE,
+  strip.white = TRUE
+)
+colnames(x2) <- c(
+  "chrom", "newstart", "newend", "peakname", "rank", "strand",
+  "chromb", "newstartb", "newendb", "peaknameb", "rankb", "strandb"
 )
-colnames(x2)=c("chrom","newstart","newend","peakname","rank","strand",
-         "chromb","newstartb","newendb","peaknameb","rankb","strandb")
 
 # Determine which peaks to keep
-keep1_ranks=x2[x2$rank==x2$rankb,]$rank
-remove_ranks=x2[x2$rank<x2$rankb,]$rankb
-keep_ranks=setdiff(keep1_ranks,remove_ranks)
+keep1_ranks <- x2[x2$rank == x2$rankb, ]$rank
+remove_ranks <- x2[x2$rank < x2$rankb, ]$rankb
+keep_ranks <- setdiff(keep1_ranks, remove_ranks)
 
 # Filter peaks
-xdf_filtered=xdf[xdf$rank %in% keep_ranks,]
-x_filtered=x[x$peakname %in% xdf_filtered$peakname,]
+xdf_filtered <- xdf[xdf$rank %in% keep_ranks, ]
+x_filtered <- x[x$peakname %in% xdf_filtered$peakname, ]
 
 # Remove temporary files
-system(paste("rm -f",tmp1bed,tmp))
+system(paste("rm -f", tmp1bed, tmp))
 
 # Prepare final dataframe
-xdf=x_filtered[c("chrom",
-         "newstart",
-         "newend",
-         "peakname",
-         "score",
-         "strand",
-         "signalValue",
-         "normalized_pvalue",
-         "qvalue")]
-xdf$summitdist=offset
-mutate_at(xdf,vars("newstart","newend","summitdist"),as.integer) -> xdf
+xdf <- x_filtered[c(
+  "chrom",
+  "newstart",
+  "newend",
+  "peakname",
+  "score",
+  "strand",
+  "signalValue",
+  "normalized_pvalue",
+  "qvalue"
+)]
+xdf$summitdist <- offset
+mutate_at(xdf, vars("newstart", "newend", "summitdist"), as.integer) -> xdf
 
 # Write final output file
-write.table(xdf,file=out_narrowPeak,
-      sep="\t",
-      row.names = FALSE,
-      col.names = FALSE,
-      quote = FALSE)
+write.table(xdf,
+  file = out_narrowPeak,
+  sep = "\t",
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE
+)
diff --git a/workflow/scripts/pairwise.Rmd b/workflow/scripts/pairwise.Rmd
index b3abfd1..73a4957 100644
--- a/workflow/scripts/pairwise.Rmd
+++ b/workflow/scripts/pairwise.Rmd
@@ -15,28 +15,30 @@ suppressMessages(library(plotly))
 suppressMessages(library(RColorBrewer))
 suppressMessages(library(reshape2))
 # list.files(path = ".")
-e=read.csv(params$inputfilelist,header=FALSE,sep="\t",stringsAsFactors = FALSE)
-colnames(e)=c("replicate","sample","file")
-rownames(e)=e$replicate
+e <- read.csv(params$inputfilelist, header = FALSE, sep = "\t", stringsAsFactors = FALSE)
+colnames(e) <- c("replicate", "sample", "file")
+rownames(e) <- e$replicate
 ```
 
 
 ```{r pca, echo=FALSE, message=FALSE, warning=FALSE}
-d=read.csv(params$pairwise,header=FALSE,sep="\t")
-m=dcast(d,V1~V2,value.var="V3")
-colnames_m=colnames(m)
-colnames_m[1]=""
-colnames(m)=colnames_m
-m1 = m
-rownames(m1)=m1[,1]
-m1[,1] = NULL
-m.pca=prcomp(m1,scale. = T)
-m.pc1 = round(m.pca$sdev[1]^2/sum(m.pca$sdev^2)*100,2)
-m.pc2 = round(m.pca$sdev[2]^2/sum(m.pca$sdev^2)*100,2)
-m.pc3 = round(m.pca$sdev[3]^2/sum(m.pca$sdev^2)*100,2)
-#color by sample
-h=c()
-for (i in rownames(m.pca$rotation)) {h=c(h,e$sample[e$replicate==i])}
+d <- read.csv(params$pairwise, header = FALSE, sep = "\t")
+m <- dcast(d, V1 ~ V2, value.var = "V3")
+colnames_m <- colnames(m)
+colnames_m[1] <- ""
+colnames(m) <- colnames_m
+m1 <- m
+rownames(m1) <- m1[, 1]
+m1[, 1] <- NULL
+m.pca <- prcomp(m1, scale. = T)
+m.pc1 <- round(m.pca$sdev[1]^2 / sum(m.pca$sdev^2) * 100, 2)
+m.pc2 <- round(m.pca$sdev[2]^2 / sum(m.pca$sdev^2) * 100, 2)
+m.pc3 <- round(m.pca$sdev[3]^2 / sum(m.pca$sdev^2) * 100, 2)
+# color by sample
+h <- c()
+for (i in rownames(m.pca$rotation)) {
+  h <- c(h, e$sample[e$replicate == i])
+}
 ```
 \newline
 
@@ -45,18 +47,24 @@ for (i in rownames(m.pca$rotation)) {h=c(h,e$sample[e$replicate==i])}
 \newline
 
 ```{r plot3d, echo=FALSE, message=FALSE, warning=FALSE}
-if (ncol(m.pca$rotation)==2){
+if (ncol(m.pca$rotation) == 2) {
   paste0("Only 2 Components. 3-d not possible")
 } else {
-toplot=as.data.frame(m.pca$x[,1:3])
-p <- plot_ly(as.data.frame(m.pca$x[,1:3]), x = ~PC1, y = ~PC2, z = ~PC3,  hoverinfo="text", color=h,
-             hovertext = rownames(toplot)) %>%
-  add_markers() %>%
-  layout(title = "Jaccard score 3-d PCA plot",
-         scene = list(xaxis = list(title = paste0("PC1 (",m.pc1,"%)")),
-                      yaxis = list(title = paste0("PC2 (",m.pc2,"%)")),
-                      zaxis = list(title = paste0("PC3 (",m.pc3,"%)"))))
-p
+  toplot <- as.data.frame(m.pca$x[, 1:3])
+  p <- plot_ly(as.data.frame(m.pca$x[, 1:3]),
+    x = ~PC1, y = ~PC2, z = ~PC3, hoverinfo = "text", color = h,
+    hovertext = rownames(toplot)
+  ) %>%
+    add_markers() %>%
+    layout(
+      title = "Jaccard score 3-d PCA plot",
+      scene = list(
+        xaxis = list(title = paste0("PC1 (", m.pc1, "%)")),
+        yaxis = list(title = paste0("PC2 (", m.pc2, "%)")),
+        zaxis = list(title = paste0("PC3 (", m.pc3, "%)"))
+      )
+    )
+  p
 }
 ```
 \newline
@@ -66,15 +74,17 @@ p
 \newline
 
 ```{r plot, echo=FALSE, message=FALSE, warning=FALSE}
-toplot=as.data.frame(m.pca$x[,1:2])
-x <- list (
-  title = paste0("PC1 (",m.pc1,"%)")
+toplot <- as.data.frame(m.pca$x[, 1:2])
+x <- list(
+  title = paste0("PC1 (", m.pc1, "%)")
 )
-y <- list (
-  title = paste0("PC2 (",m.pc2,"%)")
+y <- list(
+  title = paste0("PC2 (", m.pc2, "%)")
 )
-p <- plot_ly(as.data.frame(m.pca$x[,1:2]), x = ~PC1, y = ~PC2,  hoverinfo="text", color=h,
-             hovertext = rownames(toplot)) %>%
+p <- plot_ly(as.data.frame(m.pca$x[, 1:2]),
+  x = ~PC1, y = ~PC2, hoverinfo = "text", color = h,
+  hovertext = rownames(toplot)
+) %>%
   add_markers() %>%
   layout(title = "Jaccard score 2-d PCA plot", xaxis = x, yaxis = y)
 p