Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,36 @@ exclude: |
)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v1.2.3
rev: v6.0.0
hooks:
- id: check-added-large-files
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-json
# spell check
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
rev: v2.4.2
hooks:
- id: codespell
# https://github.com/codespell-project/codespell/issues/1498
# Python formatting
- repo: https://github.com/psf/black
rev: 23.7.0
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 26.3.0
hooks:
- id: black
# R formatting
- repo: https://github.com/lorenzwalthert/precommit
rev: v0.1.2
rev: v0.4.3.9021
hooks:
- id: style-files
# general linting
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v2.7.1
rev: v4.0.0-alpha.8
hooks:
- id: prettier
# enforce commit format
- repo: https://github.com/compilerla/conventional-pre-commit
rev: v2.3.0
rev: v4.4.0
hooks:
- id: conventional-pre-commit
stages: [commit-msg]
Expand Down
1 change: 0 additions & 1 deletion .tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@
These input files are used for continuous integration purposes, specificially to dry run the pipeline whenever commits have been made to the main, master, or unified branches.

**Please Note:** Each of the provided FastQ files and BAM files have only headers and will not work for the LOGAN pipeline

2 changes: 1 addition & 1 deletion .tests/pairs.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Tumor Normal
WGS_NC_T WGS_NC_N
WGS_NC_T WGS_NC_N
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ Sample1_NORMAL /path/to/the/BAM/folder/Sample1_NORMAL.bam /path/to/the/BAM/fol

### Genome

`--genome` - A flag to indicate which genome to run. hg38, hg19 and mm10 are supported.
`--genome` - A flag to indicate which genome to run. hg38, hg19 and mm10 are supported.
Example: `--genome hg38` to run the hg38 genome

`--genome hg19` and `--genome mm10` are also supported
Expand Down Expand Up @@ -154,7 +154,7 @@ Example of Tumor_Normal calling mode
# Step 0: Set up

sinteractive --mem=8g -N 1 -n 4
module load ccbrpipeliner # v8
module load ccbrpipeliner # v8

# set up directories

Expand Down
87 changes: 46 additions & 41 deletions bin/ascat.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,37 @@

#######
#
#R Script for ASCAT
# R Script for ASCAT
#
######
library(ASCAT)
library(RColorBrewer)

args = commandArgs(trailingOnly=TRUE)
tumor_bam=args[1]
tumor_name=args[2]
normal_bam=args[3]
normal_name=args[4]
genome=args[5]
bed=args[6]
exome=args[7]
#chroms=scan(text=args[4],sep=",",quiet=T)
cpus=as.numeric(Sys.getenv("SLURM_CPUS_PER_TASK"))
cpus=ifelse(is.na(cpus),2,cpus)

if (exists(exome)){
genomebasedir=sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT/WES",genome)
}else{
genomebasedir=sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT",genome)
args <- commandArgs(trailingOnly = TRUE)
tumor_bam <- args[1]
tumor_name <- args[2]
normal_bam <- args[3]
normal_name <- args[4]
genome <- args[5]
bed <- args[6]
exome <- args[7]
# chroms=scan(text=args[4],sep=",",quiet=T)
cpus <- as.numeric(Sys.getenv("SLURM_CPUS_PER_TASK"))
cpus <- ifelse(is.na(cpus), 2, cpus)

if (exists(exome)) {
genomebasedir <- sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT/WES", genome)
} else {
genomebasedir <- sprintf("/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/%s/ASCAT", genome)
}

##DETERMINE SEX
system(sprintf('alleleCounter -l %s/gender_chr.loci -b %s -c chrX -o %s_temp_gender.out',
genomebasedir,normal_bam,normal_name))
s=read.table(sprintf("%s_temp_gender.out",normal_name))
gender=ifelse(sum(s$V7)>5,"XY","XX")
## DETERMINE SEX
system(sprintf(
"alleleCounter -l %s/gender_chr.loci -b %s -c chrX -o %s_temp_gender.out",
genomebasedir, normal_bam, normal_name
))
s <- read.table(sprintf("%s_temp_gender.out", normal_name))
gender <- ifelse(sum(s$V7) > 5, "XY", "XX")
print(gender)

ascat.prepareHTS(
Expand All @@ -40,30 +41,34 @@ ascat.prepareHTS(
tumourname = tumor_name,
normalname = normal_name,
allelecounter_exe = "alleleCounter",
alleles.prefix = sprintf("%s/G1000_alleles/G1000_alleles_%s_chr",genomebasedir,genome),
loci.prefix = sprintf("%s/G1000_loci/G1000_loci_%s_chr",genomebasedir,genome),
alleles.prefix = sprintf("%s/G1000_alleles/G1000_alleles_%s_chr", genomebasedir, genome),
loci.prefix = sprintf("%s/G1000_loci/G1000_loci_%s_chr", genomebasedir, genome),
gender = gender,
genomeVersion = genome,
nthreads = cpus,
tumourLogR_file = sprintf("%s_LogR.txt",tumor_name),
tumourBAF_file = sprintf("%s_BAF.txt",tumor_name),
normalLogR_file = sprintf("%s_LogR.txt",normal_name),
normalBAF_file = sprintf("%s_BAF.txt",normal_name),
BED_file=bed)
tumourLogR_file = sprintf("%s_LogR.txt", tumor_name),
tumourBAF_file = sprintf("%s_BAF.txt", tumor_name),
normalLogR_file = sprintf("%s_LogR.txt", normal_name),
normalBAF_file = sprintf("%s_BAF.txt", normal_name),
BED_file = bed
)

ascat.bc = ascat.loadData(Tumor_LogR_file = sprintf("%s_LogR.txt",tumor_name),
Tumor_BAF_file = sprintf("%s_BAF.txt",tumor_name),
Germline_LogR_file = sprintf("%s_LogR.txt",normal_name), Germline_BAF_file = sprintf("%s_BAF.txt",normal_name),
gender = gender, genomeVersion = genome)
ascat.bc <- ascat.loadData(
Tumor_LogR_file = sprintf("%s_LogR.txt", tumor_name),
Tumor_BAF_file = sprintf("%s_BAF.txt", tumor_name),
Germline_LogR_file = sprintf("%s_LogR.txt", normal_name), Germline_BAF_file = sprintf("%s_BAF.txt", normal_name),
gender = gender, genomeVersion = genome
)

ascat.plotRawData(ascat.bc, img.prefix = "Before_correction_")
ascat.bc = ascat.correctLogR(ascat.bc,
GCcontentfile = sprintf("%s/GC_G1000/GC_G1000_%s.txt",genomebasedir,genome),
replictimingfile = sprintf("%s/RT_G1000/RT_G1000_%s.txt",genomebasedir,genome))
ascat.bc <- ascat.correctLogR(ascat.bc,
GCcontentfile = sprintf("%s/GC_G1000/GC_G1000_%s.txt", genomebasedir, genome),
replictimingfile = sprintf("%s/RT_G1000/RT_G1000_%s.txt", genomebasedir, genome)
)
ascat.plotRawData(ascat.bc, img.prefix = "After_correction_")
ascat.bc = ascat.aspcf(ascat.bc)
ascat.bc <- ascat.aspcf(ascat.bc)
ascat.plotSegmentedData(ascat.bc)
ascat.output = ascat.runAscat(ascat.bc, gamma=1, write_segments = T)
QC = ascat.metrics(ascat.bc,ascat.output)
write.table(QC,sprintf("%s.qc.txt",paste0(tumor_name,"_vs_",normal_name)))
save(ascat.bc, ascat.output, QC, file = sprintf('%s_vs_%s_ascat.Rdata',tumor_name,normal_name))
ascat.output <- ascat.runAscat(ascat.bc, gamma = 1, write_segments = T)
QC <- ascat.metrics(ascat.bc, ascat.output)
write.table(QC, sprintf("%s.qc.txt", paste0(tumor_name, "_vs_", normal_name)))
save(ascat.bc, ascat.output, QC, file = sprintf("%s_vs_%s_ascat.Rdata", tumor_name, normal_name))
116 changes: 57 additions & 59 deletions bin/assess_significance.R
Original file line number Diff line number Diff line change
@@ -1,59 +1,57 @@
#!/usr/bin/env Rscript

library(rtracklayer)

args <- commandArgs()

dataTable <-read.table(args[5], header=TRUE);
ratio<-data.frame(dataTable)

dataTable <- read.table(args[4], header=FALSE)
cnvs<- data.frame(dataTable)

ratio$Ratio[which(ratio$Ratio==-1)]=NA

cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3]))
ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio)

overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed)
normals <- setdiff(ratio.bed,cnvs.bed)
normals <- subsetByOverlaps(ratio.bed,normals)

#mu <- mean(score(normals),na.rm=TRUE)
#sigma<- sd(score(normals),na.rm=TRUE)

#hist(score(normals),n=500,xlim=c(0,2))
#hist(log(score(normals)),n=500,xlim=c(-1,1))

#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
#qqline(score(normals)[which(!is.na(score(normals)))], col = 2)

#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)

numberOfCol=length(cnvs)
wscore=c()
kscore=c()
for (i in c(1:length(cnvs[,1]))) {
values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i]))
resultw <- class(try(wilcox.test(values,score(normals)), silent = TRUE))
ifelse(resultw == "try-error", wscore <- c(wscore, "NA"), wscore <- c(wscore, wilcox.test(values,score(normals))$p.value))
resultks <- class(try(ks.test(values,score(normals)), silent = TRUE))
ifelse(resultks == "try-error",kscore <- c(kscore, "NA"),kscore <- c(kscore, ks.test(values,score(normals))$p.value))
}
cnvs = cbind(cnvs, as.numeric(wscore), as.numeric(kscore))

if (numberOfCol==7) {
names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==9) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==11) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
write.table(cnvs, file=paste(args[4],".p.value.txt",sep=""),sep="\t",quote=F,row.names=F)


#!/usr/bin/env Rscript

library(rtracklayer)

args <- commandArgs()

dataTable <- read.table(args[5], header = TRUE)
ratio <- data.frame(dataTable)

dataTable <- read.table(args[4], header = FALSE)
cnvs <- data.frame(dataTable)

ratio$Ratio[which(ratio$Ratio == -1)] <- NA

cnvs.bed <- GRanges(cnvs[, 1], IRanges(cnvs[, 2], cnvs[, 3]))
ratio.bed <- GRanges(ratio$Chromosome, IRanges(ratio$Start, ratio$Start), score = ratio$Ratio)

overlaps <- subsetByOverlaps(ratio.bed, cnvs.bed)
normals <- setdiff(ratio.bed, cnvs.bed)
normals <- subsetByOverlaps(ratio.bed, normals)

# mu <- mean(score(normals),na.rm=TRUE)
# sigma<- sd(score(normals),na.rm=TRUE)

# hist(score(normals),n=500,xlim=c(0,2))
# hist(log(score(normals)),n=500,xlim=c(-1,1))

# shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
# qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
# qqline(score(normals)[which(!is.na(score(normals)))], col = 2)

# shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
# qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
# qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)

numberOfCol <- length(cnvs)
wscore <- c()
kscore <- c()
for (i in c(1:length(cnvs[, 1]))) {
values <- score(subsetByOverlaps(ratio.bed, cnvs.bed[i]))
resultw <- class(try(wilcox.test(values, score(normals)), silent = TRUE))
ifelse(resultw == "try-error", wscore <- c(wscore, "NA"), wscore <- c(wscore, wilcox.test(values, score(normals))$p.value))
resultks <- class(try(ks.test(values, score(normals)), silent = TRUE))
ifelse(resultks == "try-error", kscore <- c(kscore, "NA"), kscore <- c(kscore, ks.test(values, score(normals))$p.value))
}
cnvs <- cbind(cnvs, as.numeric(wscore), as.numeric(kscore))

if (numberOfCol == 7) {
names(cnvs) <- c("chr", "start", "end", "copy number", "status", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
}
if (numberOfCol == 9) {
names(cnvs) <- c("chr", "start", "end", "copy number", "status", "genotype", "uncertainty", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
}
if (numberOfCol == 11) {
names(cnvs) <- c("chr", "start", "end", "copy number", "status", "genotype", "uncertainty", "somatic/germline", "precentageOfGermline", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
}
write.table(cnvs, file = paste(args[4], ".p.value.txt", sep = ""), sep = "\t", quote = F, row.names = F)
Loading