Skip to content

Latest commit

 

History

History
294 lines (234 loc) · 9.2 KB

File metadata and controls

294 lines (234 loc) · 9.2 KB

004_correlation_platform_shared.R

xrydbh 2022-02-04

library(tidyverse)
library(DescTools)
## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:car':
## 
##     Recode

## The following object is masked from 'package:sjmisc':
## 
##     %nin%
library(writexl)
library(ggvenn)
## Loading required package: grid
library(VennDiagram) 
## Loading required package: futile.logger

## 
## Attaching package: 'VennDiagram'

## The following object is masked from 'package:car':
## 
##     ellipse
# Set path
# If script is running separately move up one step with setwd(".."). Only needed once (for first script run separately).



corr_method="spearman"
p.adjust.methods=c("none") #fdr,none,BH

# Get abundances
loaded_abundancies <- load("./RData/003_ab_list.RData")
loaded_abundancies 
## [1] "ab_list"
ms_key <- ab_list[["dat_meta"]][["ms_meta"]] %>% 
  dplyr::select(UniprotID,Gene.symbol)

pea_key <- ab_list[["dat_meta"]][["pea_meta"]] %>% 
  dplyr::select(UniprotID,Gene.symbol)

# sampName_key <- load("./RData/001_alt_samp_names.RData")
# #MS_column_head.0.1$Plf_unique_no
# #PEA_column_head.0.1

PEA_column_head.0.1 <- readxl::read_excel("./data/PEA_column_head.xlsx") 
MS_column_head.0.1 <- readxl::read_excel("./data/MS_column_head.xlsx") 




# get ms abundances
ms_ab <- ab_list %>% 
  pluck("dat_abundances") %>% 
  pluck("ms_abundances_eq_med_norm") %>%  #"ms_abundances"
  dplyr::select(-Gene.symbol)
dim(ms_ab)
## [1] 745  17
# Get pea abundances
pea_ab.0.0 <- ab_list %>%
  pluck("dat_abundances") %>%
  pluck("pea_abundances_eq_med_norm") %>%
  dplyr::select(-c(Panel))

# dim(pea_ab.0.0 )

# Get pea abundances; Need to filter on number of NA ; For shared we dont want any missing values; cannot use na.omit one PEA data since we are keeping themissing values
pea_ab_filt <- ab_list %>% 
  pluck("dat_meta") %>% 
  pluck("pea_meta") %>% 
  filter(Numb_NA_High<=0&Numb_NA_Low<=0) %>% 
  dplyr::select(names(pea_ab.0.0))
  
dim(pea_ab_filt)
## [1] 392  18
pea_ab <- pea_ab.0.0 %>% 
  filter(UniprotID%in%pea_ab_filt$UniprotID) %>% 
  dplyr::select(-Gene.symbol)

dim(pea_ab)
## [1] 392  17
# ##########################
# # Make Venn diagram
# ##########################
# pea_meta <- pea_ab.0.0$UniprotID
# ms_meta <- ms_ab$UniprotID
# l.pea <-pea_ab.0.0$UniprotID %>% length()
# l.ms <- ms_ab$UniprotID %>% length()
# 
# venn_list <- list(MS=ms_ab$UniprotID,PEA=pea_ab$UniprotID)
# 
# 
# # pdf(file = paste("./out_r/019_pea_overlap_wgcna_ttest.pdf",sep=""))
# # ggvenn(
# #   venn_list , 
# #   fill_color = c("#0073C2FF", "#EFC000FF"), #, "#CD534CFF",, "#868686FF"
# #   stroke_size = 0.5, set_name_size = 4
# # )
# # dev.off()
# 
# venn_text=paste("Overlap of ",
#                 l.ms,
#                 " ms and \n",
#                 l.pea," Olink pea proteins",
#                 sep="")
# 
# 
# venn.diagram(venn_list, main=venn_text,fill = c("darkgreen", "darkgrey"), 
#              main.cex=3, 
#              alpha = c(0.7, 0.7), 
#              lwd =1, filename="./out_r/004_venn_overlap_ms_pea_unip.tiff",
#              cex=4,label.col=c("grey","black","darkolivegreen4"),
#              cat.cex=3,cat.dist=-0.05,cat.col=c("chartreuse4","lightgrey"))#
# 
# # Gene symbol
# ms_ab <- ab_list %>% 
#   pluck("dat_abundances") %>% 
#   pluck("ms_abundances_eq_med_norm") %>%  #"ms_abundances"
#   dplyr::select(-UniprotID)
# pea_ab <- pea_ab.0.0 %>% 
#   filter(UniprotID%in%pea_ab_filt$UniprotID) %>% 
#   dplyr::select(-UniprotID)
# venn_list <- list(MS=ms_ab$Gene.symbol,PEA=pea_ab$Gene.symbol)
# venn.diagram(main=venn_text,venn_list, fill = c("darkgreen", "darkgrey"), 
#              main.cex=3, 
#              alpha = c(0.7, 0.7), 
#              lwd =1, filename="./out_r/004_venn_overlap_ms_pea_gs.tiff",
#              cex=4,label.col=c("grey","black","darkolivegreen4"),
#              cat.cex=3,cat.dist=-0.05,cat.col=c("chartreuse4","lightgrey"))#
# 
# # end Venn


# Give platform specific sample names
names(ms_ab) <- c(names(ms_ab)[1],(MS_column_head.0.1$Plf_unique_no))
names(pea_ab) <- c(names(pea_ab)[1],(PEA_column_head.0.1$Plf_unique_no))

# Inner join ms and pea to get shared
shared.0.0 <- ms_ab %>% 
  inner_join(pea_ab,by="UniprotID") %>% 
  na.omit()

dim(shared.0.0)
## [1] 78 33
# Gather all abundance values into one column
shared.long <- shared.0.0 %>% 
  pivot_longer(cols=-1) %>% 
  mutate_at(vars(c("value")),as.numeric)

# gather abundance values into MS and PEA columns respectively
shared.semilong <- shared.0.0 %>% pivot_longer(cols=-1,
                                               names_to = c(".value","samp"),
                                               names_sep = "_") %>% 
  mutate_at(vars(c("MS","PEA")),as.numeric)

# Make numeric and perform correlation test
shared.0.2 <- shared.semilong %>% 
  group_by_at("UniprotID") %>% 
  mutate_at(vars("MS","PEA"), list(as.numeric)) %>%
  summarise(Corr = cor.test(MS,PEA)$estimate,
            p = cor.test(MS,PEA, method=corr_method)$p.value,
            p_adjust= p.adjust(p, method = p.adjust.methods, n = length("UniprotID")),#dim(shared.0.2)[1]
            log_p_adjust= log2(p_adjust),
            logp = log2(p),
            Sig = p < 0.05,
            Sig_adjust = p_adjust < 0.05,
            CI_low = SpearmanRho(x=MS, y = PEA, conf.level=0.95)[2], #cor.test(MS,PEA)$conf.int[1]
            CI_high = SpearmanRho(x=MS, y = PEA, conf.level=0.95)[3])

numb_shared_prots <- length(shared.0.2$UniprotID)

# Annotate results with Gene.symbol and make Unipor-Gene.symbol united name
shared.0.3 <- inner_join(shared.0.2,ms_key,by=c("UniprotID"="UniprotID")) %>% #with ms gene.symb
  relocate(Gene.symbol,.after=UniprotID) %>% 
  dplyr::rename(Gene.symbol.ms=Gene.symbol) %>% # rename to Gene.symbol
  inner_join(pea_key,by=c("UniprotID"="UniprotID")) %>% # with pea Gene.symbol
  relocate(Gene.symbol,.after=Gene.symbol.ms) %>% 
  dplyr::rename(Gene.symbol.pea=Gene.symbol) %>%  
  # filter(Gene.symbol.pea!="FAM3C") %>% # What is going on with FAM3C ?
  # filter(Gene.symbol.ms!="FAM3C") %>% 
  unite(UnitedIDs,Gene.symbol.ms:UniprotID,sep="_")


# Replace -inf vals 
max_val<- shared.0.3$logp[!is.infinite(shared.0.3$logp)] %>% max()

min_val <- shared.0.3$logp[!is.infinite(shared.0.3$logp)] %>% min()

shared.0.3$logp[is.infinite(shared.0.3$logp)] <- min_val
shared.0.3$log_p_adjust[is.infinite(shared.0.3$log_p_adjust)] <- min_val


title_plot_anders <- paste("Plot of ",corr_method," correlation for ",numb_shared_prots," shared genes, no missing values","\nAdjust method: ",p.adjust.methods,sep="")

shared_prots_eq_med_norm_ranked_correlation <- shared.0.3 %>% ggplot(aes(x=reorder(UnitedIDs, Corr), # !!as.symbol(protID)
                          y=Corr, color=-log_p_adjust)) + #logp,log_p_adjust
  geom_hline(yintercept=0, linetype="dashed") + 
  geom_point(aes(shape=Sig_adjust), size=5) + #Sig,Sig_adjust
  labs(colour = "p") + #-logp
  theme_classic() + 
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5)) + 
  geom_linerange(aes(ymax=CI_low, ymin=CI_high)) + 
  coord_flip() + 
  xlab("Proteins measured by both MS and PEA") +
  ggtitle(title_plot_anders)

shared_prots_eq_med_norm_ranked_correlation

# remove
# remove old files
file_to_remove <- paste("004_shared_prots_eq_med_norm_ranked_correlation_",corr_method,"_corr_",sep="")
junk.0 <- dir(path="./out_r/",  pattern=file_to_remove) # ?dir
junk <- paste("./out_r/",junk.0,sep="")
file.remove(junk) # ?file.remove
## [1] TRUE
# Anders plot 
pdf(file = paste("./out_r/004_shared_prots_eq_med_norm_ranked_correlation_",corr_method,"_corr_",numb_shared_prots ,"_adjust_method_",p.adjust.methods,".pdf",sep=""), #_samp10_rem The directory you want to save the file in
    width = 8, # The width of the plot in inches
    height = 12) # The height of the plot in inches
print(shared_prots_eq_med_norm_ranked_correlation)

dev.off()
## quartz_off_screen 
##                 2
write_xlsx(shared.0.3,paste("./out_r/004_corr_shared.xlsx",sep=""))

# shared_old <- c("O00533","O43405","O75326","O95445","P00441","P00915","P01034","P01130","P02452","P02760","P02786","P03951","P04275","P04792","P05121","P05154","P05164","P05362","P05543","P05556","P06681","P07237","P07339","P07359","P08253","P0DOY2","P10586","P10721","P11226","P11717","P12830","P13591","P14151","P14543","P14780","P15144","P17813","P17936","P18065","P19320","P20023","P22105","P23284","P24592","P24821","P27487","P30086","P33151","P35443","P35542","P36222","P49747","P52888","P98160","Q02487","Q12860","Q13332","Q13867","Q15113","Q15166","Q15485","Q15582","Q15828","Q16698","Q16853","Q4KMG0","Q6EMK4","Q86VB7","Q8IW75","Q8NBP7","Q92520","Q99497","Q99969","Q99972","Q9UM47","Q9Y5Y7","Q9Y624","Q9Y6N7")
# 
# shared_new <- shared.0.2$UniprotID %>% unique
# 
# # Not in old: "Q12884" "Q9HCB6" "Q9NQ79"
# shared_new[!(shared_new%in%shared_old)]
# # Not in new:"P00441" "P07339" Not in raw data file
# shared_old[!(shared_old%in%shared_new)]
# 
# # P07339 in Olink input
# CTSD
# OID00622
# 
# # P00441
# SOD1
# OID01222