forked from lbrusman/PanKbase-RNA-expression-PCA-Explorer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_PCA_results.R
More file actions
45 lines (35 loc) · 1.36 KB
/
generate_PCA_results.R
File metadata and controls
45 lines (35 loc) · 1.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
library(shiny)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(Hmisc)
library(janitor)
library(Matrix)
## This file is to generate results for PCA visualization
## That way you don't need to run PCA every time (it's slow)
set.seed(123)
cell_types <- c("Acinar", "Active Stellate", "Alpha", "Beta", "Cycling Alpha", "Delta", "Ductal", "Endothelial", "Gamma + Epsilon", "Immune (Macrophages)", "MUC5B+ Ductal", "Quiescent Stellate")
for (c in cell_types) {
# Get pseudobulk data for this cell type
fname <- paste0("../outputs/CPM_matrices/", c, "_pseudobulk_cpm.csv")
pseudo_df <- read.csv(fname)
# Get all gene names
genes <- colnames(pseudo_df)[76:ncol(pseudo_df)]
pseudo_df[,genes] <- lapply(pseudo_df[,genes], as.numeric) # Make sure columns are numeric
# Get gene expr. matrix to do PCA
to_plot <- pseudo_df[,genes]
# Do PCA
res_pca <- prcomp(to_plot, scale = TRUE)
# Save RDS for factor contributions
setwd("../outputs/PCA_results/")
fname <- paste0(c, "_all_PCA_results.rds")
saveRDS(res_pca, fname)
# Merge PCA results with metadata
res <- res_pca$x %>% as.data.frame()
res$samples <- pseudo_df$samples
res$treatments <- pseudo_df$treatments
res <- res %>% merge(pseudo_df[,1:75], on = c("samples", "treatments"))
# Save PC results merged with metadata
fname <- paste0(c, "_PCA_results.csv")
write.csv(res, fname)
}