-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path7_Plotting.R
More file actions
161 lines (130 loc) · 5.61 KB
/
7_Plotting.R
File metadata and controls
161 lines (130 loc) · 5.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
##########################################################
### Author: Sarmiento Cabello, Sonia ###
### Version: 1.0. ###
### Objective: Plotting ouput of Switch tool per ###
### sample and per variant. ###
##########################################################
###########################
### Plotting per sample ###
###########################
#Plotting the Switch Error Rate for all phasing types you are comparing per sample
# Load libraries
library(ggplot2)
library(tidyverse)
# Construct a default theme for plotting:
default.theme <- theme_bw() +
theme(plot.title = element_text(size = 8, colour = "black", face = "bold", hjust = 0),
axis.title = element_text(size = 8, colour = "black", face = "bold"),
axis.text.x = element_text(size = 8, colour = "black"),
axis.text.y = element_text(size = 8, colour = "black"),
axis.line = element_line(size = 0.25, colour = "black"),
legend.title = element_text(size = 8, colour = "black"),
legend.text = element_text(size = 7, colour = "black"),
legend.key.size = unit(0.3, "cm"),
strip.background = element_rect(fill = "white"),
strip.text = element_text(size = 8, colour = "black", face = "bold"))
# Set Directory
# CHANGE ACCORDING TO YOUR DIRECTORY #
setwd('/Users/soniasarmiento/Documents/Phasing')
# Load switch outputs, one per phasing type (e.g. read-base, pedigree (trio + multiple trio))
# CHANGE ACCORDING TO YOUR FILES #
phasing1 <- read.delim('outputexample.sample.switch.txt.gz', sep='', header=F) #e.g. load readbase
phasing2 <- read.delim('outputexample.sample.switch.txt.gz', sep='', header=F) #e.g. load single trio
phasing3 <- read.delim('outputexample.sample.switch.txt.gz', sep='', header=F) #e.g. load multiple trio
#Add descriptive columns to the database
# Phasing 1 #
phasing1$type <- 'phasing1'
phasing1$scaffold <- 'scaffold 14'
colnames(phasing1)[1]<- 'indv'
colnames(phasing1)[4]<- 'SER'
# Phasing 2 #
phasing2$type <- 'phasing2'
phasing2$scaffold <- 'scaffold 14'
colnames(phasing2)[1]<- 'indv'
colnames(phasing2)[4]<- 'SER'
# Phasing 3 #
phasing3$type <- 'phasing3'
phasing3$scaffold <- 'scaffold 14'
colnames(phasing3)[1]<- 'indv'
colnames(phasing3)[4]<- 'SER'
# Bind the datasets
df <- rbind(phasing1, phasing2, phasing3)
df <- df %>% group_by(type)
# get the mean per phasing type
summary_df <- df %>% group_by(type) %>% summarize(m=mean(SER))
# PLOT SER per sample #
jpeg(paste0('Results_Sample_Switch',win,'.jpg'), width=12, height=6, unit='in',quality = 1000,res=800, antialias = "cleartype")
ggplot(df) +
geom_line(aes(x=scaffold, y=SER, color=type), size = 0.5) +
xlab("Scaffold") + ylab("Switch error rate (%)") +
default.theme +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=5)) +
theme(legend.position="top") +
scale_color_manual(values=c("#00AFBB", "#E7B800", '#FF4500')) + # as many color as phasing types
geom_text(data=summary_df,
aes(x=scaffold, y=m, label=round(m,2), group=type, color=type),
position = position_dodge(width = 1.3),
vjust = -6.5,
size = 3,
parse = TRUE)
dev.off()
############################
### Plotting per variant ###
############################
#Plotting the Switch Error Rate of one phasing type along the chromosome
# Load libraries #
library(SNPRelate)
library(stats)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
# Load Data
df <- read.delim('pedigreephased.variant.switch.txt', header=F, sep='')
colnames(df) <- c('rm1', 'pos', 'rm2', 'rm3', 'trioSER')
df$trioSER <- ifelse(df$trioSER=='NaN', NA, df$trioSER)
# Remove rows with NAs for both readbase and trio
df_complete <- na.omit(df)
df_complete <- df_complete %>% select(pos, trioSER)
#for several columns:
#sel <- apply( df[,c("readbaseSER","trioSER")], 1, function(x) all(is.na(x)) )
#df_complete <- df[!sel,]
# Once we have the complete df without NAs, reorder it by ascending position of chr.
df_complete$pos <- as.numeric(df_complete$pos)
df_complete <- df_complete %>% arrange(pos)
# We have 331,006 SNPs, so we can't plot per SNP and have to plot per window
# make a dataframe with windows in SNP indexes
win <- 1000
round.down <- nrow(df_complete) - (nrow(df_complete) %% win) # rounds down to previous 100th
change <- (nrow(df_complete) - (ncol(df_complete) %% win)) / win
# makes intervals of 100 SNPs
int <- data.frame('start'=seq(1,round.down,win),
'end'=seq(1,round.down,win)+win-1)
# Define Var matrix
var <- as.data.frame(matrix(NA,nrow=nrow(int), ncol=1))
pos <- c()
#Transform df_complete to a matrix
t <- df_complete %>% select(trioSER)
dat.t <- t(t)
# Sum
for(i in 1:nrow(int)){
# Sum number of mismatches
var[i,] <- sum(dat.t[,seq(int$start[i],int$end[i])]) / win
# Extract mean position of SNPs in the window
pos[i] <- mean(df_complete$pos[int$start[i]:int$end[i]])
}
# Plot mismatches in snps windows:
#Function for transparency
add.alpha <- function(col, alpha=1){
if(missing(col))
stop("Please provide a vector of colours.")
apply(sapply(col, col2rgb)/255, 2,
function(x)
rgb(x[1], x[2], x[3], alpha=alpha))
}
# Plot
jpeg(paste0('Single trio phasing vs Mendelian Inheritance along the Scaffold.jpg'), width=12,height=6,unit='in',quality = 1000,res=800)
par(mfrow=c(1,1))
plot(0,pch='',xlab="1000 snps window index (ss14)",ylab='switch error rate', xlim=c(0,max(df_complete$pos)),
ylim=c(0,10), main = "Comparison of single trio pedigree phasing to Mendelian Inheritance along ss14")
lines(var[,1]~pos, col=add.alpha('black',.7)) # plot readbase
dev.off()