CCBR · pre-commit-ci · Mar 9, 2026 · Mar 9, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,36 +8,36 @@ exclude: |
    )
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v1.2.3
+    rev: v6.0.0
     hooks:
       - id: check-added-large-files
       - id: end-of-file-fixer
       - id: trailing-whitespace
       - id: check-json
   # spell check
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.4.2
     hooks:
       - id: codespell
         # https://github.com/codespell-project/codespell/issues/1498
   # Python formatting
-  - repo: https://github.com/psf/black
-    rev: 23.7.0
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 26.3.0
     hooks:
       - id: black
   # R formatting
   - repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.1.2
+    rev: v0.4.3.9021
     hooks:
       - id: style-files
   # general linting
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v2.7.1
+    rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
   # enforce commit format
   - repo: https://github.com/compilerla/conventional-pre-commit
-    rev: v2.3.0
+    rev: v4.4.0
     hooks:
       - id: conventional-pre-commit
         stages: [commit-msg]

diff --git a/.tests/README.md b/.tests/README.md
@@ -3,4 +3,3 @@
 These input files are used for continuous integration purposes, specificially to dry run the pipeline whenever commits have been made to the main, master, or unified branches.
 
 **Please Note:** Each of the provided FastQ files and BAM files have only headers and will not work for the LOGAN pipeline
-
diff --git a/.tests/pairs.tsv b/.tests/pairs.tsv
@@ -1,2 +1,2 @@
 Tumor	Normal
-WGS_NC_T	WGS_NC_N	
+WGS_NC_T	WGS_NC_N
diff --git a/README.md b/README.md
@@ -85,7 +85,7 @@ Sample1_NORMAL  /path/to/the/BAM/folder/Sample1_NORMAL.bam  /path/to/the/BAM/fol
 
 ### Genome
 
-`--genome` - A flag to indicate which genome to run. hg38, hg19 and mm10 are supported.  
+`--genome` - A flag to indicate which genome to run. hg38, hg19 and mm10 are supported.
 Example: `--genome hg38` to run the hg38 genome
 
 `--genome hg19` and `--genome mm10` are also supported
@@ -154,7 +154,7 @@ Example of Tumor_Normal calling mode
 # Step 0: Set up
 
 sinteractive --mem=8g -N 1 -n 4
-module load ccbrpipeliner # v8 
+module load ccbrpipeliner # v8
 
 # set up directories
 

diff --git a/bin/ascat.R b/bin/ascat.R
@@ -2,36 +2,37 @@
 
 #######
 #
-#R Script for ASCAT
+# R Script for ASCAT
 #
 ######
 library(ASCAT)
 library(RColorBrewer)
 
-args = commandArgs(trailingOnly=TRUE)
-tumor_bam=args[1]
-tumor_name=args[2]
-normal_bam=args[3]
-normal_name=args[4]
-genome=args[5]
-bed=args[6]
-exome=args[7]
-#chroms=scan(text=args[4],sep=",",quiet=T)
-cpus=as.numeric(Sys.getenv("SLURM_CPUS_PER_TASK"))
-cpus=ifelse(is.na(cpus),2,cpus)
-
-if (exists(exome)){
-  genomebasedir=sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT/WES",genome)
-}else{
-  genomebasedir=sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT",genome)
+args <- commandArgs(trailingOnly = TRUE)
+tumor_bam <- args[1]
+tumor_name <- args[2]
+normal_bam <- args[3]
+normal_name <- args[4]
+genome <- args[5]
+bed <- args[6]
+exome <- args[7]
+# chroms=scan(text=args[4],sep=",",quiet=T)
+cpus <- as.numeric(Sys.getenv("SLURM_CPUS_PER_TASK"))
+cpus <- ifelse(is.na(cpus), 2, cpus)
 
+if (exists(exome)) {
+  genomebasedir <- sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT/WES", genome)
+} else {
+  genomebasedir <- sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT", genome)
 }
 
-##DETERMINE SEX
-system(sprintf('alleleCounter -l %s/gender_chr.loci -b %s -c chrX -o %s_temp_gender.out',
-  genomebasedir,normal_bam,normal_name))
-s=read.table(sprintf("%s_temp_gender.out",normal_name))
-gender=ifelse(sum(s$V7)>5,"XY","XX")
+## DETERMINE SEX
+system(sprintf(
+  "alleleCounter -l %s/gender_chr.loci -b %s -c chrX -o %s_temp_gender.out",
+  genomebasedir, normal_bam, normal_name
+))
+s <- read.table(sprintf("%s_temp_gender.out", normal_name))
+gender <- ifelse(sum(s$V7) > 5, "XY", "XX")
 print(gender)
 
 ascat.prepareHTS(
@@ -40,30 +41,34 @@ ascat.prepareHTS(
   tumourname = tumor_name,
   normalname = normal_name,
   allelecounter_exe = "alleleCounter",
-  alleles.prefix = sprintf("%s/G1000_alleles/G1000_alleles_%s_chr",genomebasedir,genome),
-  loci.prefix = sprintf("%s/G1000_loci/G1000_loci_%s_chr",genomebasedir,genome),
+  alleles.prefix = sprintf("%s/G1000_alleles/G1000_alleles_%s_chr", genomebasedir, genome),
+  loci.prefix = sprintf("%s/G1000_loci/G1000_loci_%s_chr", genomebasedir, genome),
   gender = gender,
   genomeVersion = genome,
   nthreads = cpus,
-  tumourLogR_file = sprintf("%s_LogR.txt",tumor_name),
-  tumourBAF_file = sprintf("%s_BAF.txt",tumor_name),
-  normalLogR_file = sprintf("%s_LogR.txt",normal_name),
-  normalBAF_file = sprintf("%s_BAF.txt",normal_name),
-  BED_file=bed)
+  tumourLogR_file = sprintf("%s_LogR.txt", tumor_name),
+  tumourBAF_file = sprintf("%s_BAF.txt", tumor_name),
+  normalLogR_file = sprintf("%s_LogR.txt", normal_name),
+  normalBAF_file = sprintf("%s_BAF.txt", normal_name),
+  BED_file = bed
+)
 
-ascat.bc = ascat.loadData(Tumor_LogR_file = sprintf("%s_LogR.txt",tumor_name), 
-    Tumor_BAF_file = sprintf("%s_BAF.txt",tumor_name), 
-    Germline_LogR_file = sprintf("%s_LogR.txt",normal_name), Germline_BAF_file = sprintf("%s_BAF.txt",normal_name), 
-    gender = gender, genomeVersion = genome)
+ascat.bc <- ascat.loadData(
+  Tumor_LogR_file = sprintf("%s_LogR.txt", tumor_name),
+  Tumor_BAF_file = sprintf("%s_BAF.txt", tumor_name),
+  Germline_LogR_file = sprintf("%s_LogR.txt", normal_name), Germline_BAF_file = sprintf("%s_BAF.txt", normal_name),
+  gender = gender, genomeVersion = genome
+)
 
 ascat.plotRawData(ascat.bc, img.prefix = "Before_correction_")
-ascat.bc = ascat.correctLogR(ascat.bc, 
-  GCcontentfile = sprintf("%s/GC_G1000/GC_G1000_%s.txt",genomebasedir,genome), 
-  replictimingfile = sprintf("%s/RT_G1000/RT_G1000_%s.txt",genomebasedir,genome))
+ascat.bc <- ascat.correctLogR(ascat.bc,
+  GCcontentfile = sprintf("%s/GC_G1000/GC_G1000_%s.txt", genomebasedir, genome),
+  replictimingfile = sprintf("%s/RT_G1000/RT_G1000_%s.txt", genomebasedir, genome)
+)
 ascat.plotRawData(ascat.bc, img.prefix = "After_correction_")
-ascat.bc = ascat.aspcf(ascat.bc)
+ascat.bc <- ascat.aspcf(ascat.bc)
 ascat.plotSegmentedData(ascat.bc)
-ascat.output = ascat.runAscat(ascat.bc, gamma=1, write_segments = T)
-QC = ascat.metrics(ascat.bc,ascat.output)
-write.table(QC,sprintf("%s.qc.txt",paste0(tumor_name,"_vs_",normal_name)))
-save(ascat.bc, ascat.output, QC, file = sprintf('%s_vs_%s_ascat.Rdata',tumor_name,normal_name))
+ascat.output <- ascat.runAscat(ascat.bc, gamma = 1, write_segments = T)
+QC <- ascat.metrics(ascat.bc, ascat.output)
+write.table(QC, sprintf("%s.qc.txt", paste0(tumor_name, "_vs_", normal_name)))
+save(ascat.bc, ascat.output, QC, file = sprintf("%s_vs_%s_ascat.Rdata", tumor_name, normal_name))
diff --git a/bin/assess_significance.R b/bin/assess_significance.R
@@ -1,59 +1,57 @@
-#!/usr/bin/env Rscript
-
-library(rtracklayer)
-
-args <- commandArgs()
-
-dataTable <-read.table(args[5], header=TRUE);
-ratio<-data.frame(dataTable)
-
-dataTable <- read.table(args[4], header=FALSE)
-cnvs<- data.frame(dataTable)
-
-ratio$Ratio[which(ratio$Ratio==-1)]=NA
-
-cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3]))  
-ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio)
-
-overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed)
-normals <- setdiff(ratio.bed,cnvs.bed)
-normals <- subsetByOverlaps(ratio.bed,normals)
-
-#mu <- mean(score(normals),na.rm=TRUE)
-#sigma<- sd(score(normals),na.rm=TRUE)
-
-#hist(score(normals),n=500,xlim=c(0,2))
-#hist(log(score(normals)),n=500,xlim=c(-1,1))
-
-#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
-#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
-#qqline(score(normals)[which(!is.na(score(normals)))], col = 2)
-
-#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
-#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
-#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)
-
-numberOfCol=length(cnvs)
-wscore=c()
-kscore=c()
-for (i in c(1:length(cnvs[,1]))) {
-values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i]))
-resultw <- class(try(wilcox.test(values,score(normals)), silent = TRUE))
-ifelse(resultw == "try-error", wscore <- c(wscore, "NA"), wscore <- c(wscore, wilcox.test(values,score(normals))$p.value))
-resultks <- class(try(ks.test(values,score(normals)), silent = TRUE))
-ifelse(resultks == "try-error",kscore <- c(kscore, "NA"),kscore <- c(kscore, ks.test(values,score(normals))$p.value))
-}
-cnvs = cbind(cnvs, as.numeric(wscore), as.numeric(kscore))
-
-if (numberOfCol==7) {
-  names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")  
-}
-if (numberOfCol==9) {
-  names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")  
-}
-if (numberOfCol==11) {
-  names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")  
-}
-write.table(cnvs, file=paste(args[4],".p.value.txt",sep=""),sep="\t",quote=F,row.names=F)
-
-
+#!/usr/bin/env Rscript
+
+library(rtracklayer)
+
+args <- commandArgs()
+
+dataTable <- read.table(args[5], header = TRUE)
+ratio <- data.frame(dataTable)
+
+dataTable <- read.table(args[4], header = FALSE)
+cnvs <- data.frame(dataTable)
+
+ratio$Ratio[which(ratio$Ratio == -1)] <- NA
+
+cnvs.bed <- GRanges(cnvs[, 1], IRanges(cnvs[, 2], cnvs[, 3]))
+ratio.bed <- GRanges(ratio$Chromosome, IRanges(ratio$Start, ratio$Start), score = ratio$Ratio)
+
+overlaps <- subsetByOverlaps(ratio.bed, cnvs.bed)
+normals <- setdiff(ratio.bed, cnvs.bed)
+normals <- subsetByOverlaps(ratio.bed, normals)
+
+# mu <- mean(score(normals),na.rm=TRUE)
+# sigma<- sd(score(normals),na.rm=TRUE)
+
+# hist(score(normals),n=500,xlim=c(0,2))
+# hist(log(score(normals)),n=500,xlim=c(-1,1))
+
+# shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
+# qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
+# qqline(score(normals)[which(!is.na(score(normals)))], col = 2)
+
+# shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
+# qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
+# qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)
+
+numberOfCol <- length(cnvs)
+wscore <- c()
+kscore <- c()
+for (i in c(1:length(cnvs[, 1]))) {
+  values <- score(subsetByOverlaps(ratio.bed, cnvs.bed[i]))
+  resultw <- class(try(wilcox.test(values, score(normals)), silent = TRUE))
+  ifelse(resultw == "try-error", wscore <- c(wscore, "NA"), wscore <- c(wscore, wilcox.test(values, score(normals))$p.value))
+  resultks <- class(try(ks.test(values, score(normals)), silent = TRUE))
+  ifelse(resultks == "try-error", kscore <- c(kscore, "NA"), kscore <- c(kscore, ks.test(values, score(normals))$p.value))
+}
+cnvs <- cbind(cnvs, as.numeric(wscore), as.numeric(kscore))
+
+if (numberOfCol == 7) {
+  names(cnvs) <- c("chr", "start", "end", "copy number", "status", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
+}
+if (numberOfCol == 9) {
+  names(cnvs) <- c("chr", "start", "end", "copy number", "status", "genotype", "uncertainty", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
+}
+if (numberOfCol == 11) {
+  names(cnvs) <- c("chr", "start", "end", "copy number", "status", "genotype", "uncertainty", "somatic/germline", "precentageOfGermline", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
+}
+write.table(cnvs, file = paste(args[4], ".p.value.txt", sep = ""), sep = "\t", quote = F, row.names = F)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,4 +3,3 @@
		These input files are used for continuous integration purposes, specificially to dry run the pipeline whenever commits have been made to the main, master, or unified branches.

		Please Note: Each of the provided FastQ files and BAM files have only headers and will not work for the LOGAN pipeline