-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path1.2_plot_qupath_measurements_KLF4_ESR2.R
More file actions
170 lines (135 loc) · 8.01 KB
/
1.2_plot_qupath_measurements_KLF4_ESR2.R
File metadata and controls
170 lines (135 loc) · 8.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# title: "Downstream QC and plotting on the single-cell fluorescence quantification of RNAscope images - KLF4, ESR2"
# input data: "RNAscope_measurements_Fig5_EDFig6.tsv"
# author: "Wangjie Liu"
## Non-confocal image data of KLF4, ESR2 (rep2 - 20240918)
## best Z
library(readr)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(stringr)
library(gridExtra)
setwd("./")
measurements <- read_delim("data/RNAscope_measurements_Fig5_EDFig6.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
rownames(measurements) <- measurements$`Object ID`
ggplot(measurements)+geom_density(mapping = aes(x =`Nucleus: Area µm^2`))+geom_vline(xintercept = 100)
# need to decide empirically based on the data
ggplot(measurements)+geom_density(mapping = aes(x =`Channel_1: Nucleus: Mean`)) + geom_vline(xintercept = 800)# DAPI signal
ggplot(measurements)+geom_density(mapping = aes(x =`Channel_1: Nucleus: Median`)) # DAPI signal
ggplot(measurements)+geom_density(mapping = aes(x =`Channel_1: Cytoplasm: Mean`)) # DAPI signal
ggplot(measurements)+geom_density(mapping = aes(x = log10(`Channel_1: Cell: IntDen Corrected`))) # DAPI signal
ggplot(measurements)+geom_density(mapping = aes(x = `Nucleus/Cell area ratio`)) # high Nucleus/Cell area ratio as an indicative of cell clumps or multinucleated cells
# filter out outlier with very small cell size
measurements <- measurements[measurements$`Nucleus: Area µm^2` >= 100 ,] # size
# filter out outlier with weak DAPI signals (wrong cell segmentation)
## for best Z
measurements <- measurements[measurements$`Channel_1: Nucleus: Mean` >= 800,] # DAPI signals
# exclude wrong segmentation or cells without intact nuclei indicated by low sum intensity of DAPI
## for best Z
measurements <- measurements[log10(measurements$`Channel_1: Cell: IntDen Corrected`) >= 6.5, ] # sum of background corrected DAPI signals
# filter out cells that in cell clumps indicated by high Nucleus/Cell area ratio & filter out artifacts that come from dirt indicated by high median DAPI signals in cytoplasm
measurements <- measurements[measurements$`Nucleus/Cell area ratio` <= 0.75 & measurements$`Channel_1: Cytoplasm: Mean` <= 1000, ]
measurements$analysis <- "Fluorescence"
measurements$analysis[grep("DPC", measurements$Image)] <- "DPC"
table(measurements$analysis)
# DPC Fluorescence
# 6417 6841
measurements2 <- measurements[measurements$analysis != "DPC",]
measurements2 <- measurements2[, c(1, 2, grep("Cell: Mean",colnames(measurements2)), grep("Cell: IntDen Corrected",colnames(measurements2)), grep("Cell: ROI Area",colnames(measurements2)))]
measurements2$Well <- substring(measurements2$Image, 33, 39)
table(measurements2$Well)
# R02-C02 R02-C03 R02-C04 R02-C05 R03-C02 R03-C03 R03-C04 R03-C05 R04-C02 R04-C03 R04-C04 R05-C02 R05-C03 R05-C04 R06-C02 R06-C03 R06-C04
# 312 419 410 356 454 431 419 385 323 489 512 418 356 481 346 415 315
measurements2$condition <- "Pos_neg_controls"
measurements2$condition[measurements2$Well %in% c(paste0("R02-C0",2:4))] <- "Ch2=Net1; Ch3=WPRE; Ch4=Esr2-ORF"
measurements2$condition[measurements2$Well %in% c(paste0("R03-C0",2:4))] <- "Ch2=Net1; Ch3=WPRE; Ch4=Cdsn"
measurements2$condition[measurements2$Well %in% c(paste0("R04-C0",2:4))] <- "Ch2=Gng12; Ch3=WPRE; Ch4=Aspn"
measurements2$condition[measurements2$Well %in% c(paste0("R05-C0",2:4))] <- "Ch2=Postn; Ch3=WPRE; Ch4=Glul"
measurements2$condition[measurements2$Well %in% c(paste0("R06-C0",2:4))] <- "Ch2=Postn; Ch3=WPRE; Ch4=Glul"
table(measurements2$condition)
measurements2$cell_group <- "WT-C3H10 cells"
measurements2$cell_group[measurements2$Well %in% c(paste0("R0",2:4,"-C02"))] <- "Esr2 cells"
measurements2$cell_group[measurements2$Well %in% c(paste0("R0",5:6,"-C02"))] <- "Klf4 cells"
measurements2$cell_group[measurements2$Well %in% c(paste0("R0",2:6,"-C03"))] <- "mCherry-noDox cells"
measurements2$cell_group[measurements2$Well %in% c(paste0("R0",2:6,"-C04"))] <- "WT-C3H10 cells"
table(measurements2$cell_group)
condition.ois <- unique(measurements2$condition)
condition.ois <- condition.ois[condition.ois != "Pos_neg_controls"]
lapply(condition.ois, function(x){
condition.oi <- x
print(condition.oi)
plot_data <- measurements2[measurements2$condition == condition.oi,]
intensity.oi <- "Mean BgCorrected"
colnames(plot_data) <- sub(paste0(": Cell: ", intensity.oi),"",colnames(plot_data))
colnames(plot_data)
# Channel_0 = DPC
# Channel_1 = DAPI
# Channel_2 = Vivid520
# Channel_3 = Vivid570
# Channel_4 = Vivid650
# Channel_5 = BF
plot_data <- as.data.frame(plot_data)
# for each well and each channel, remove 1% of the upper intensity (extreme values/outliers)
cells.out <- c()
for (Ch.oi in paste0("Channel_", 2:4)){
for (Well.oi in unique(plot_data$Well)){
df <- plot_data[plot_data$Well == Well.oi, ]
cells.out <- c(cells.out, df$`Object ID`[df[, Ch.oi] >= quantile(df[, Ch.oi], 0.99)], df$`Object ID`[df[, Ch.oi] <= quantile(df[, Ch.oi], 0.01)])
}
}
plot_data <- plot_data[!plot_data$`Object ID` %in% cells.out, ]
## contamination correction (before log transformation)
model_Ch2 <- lm(data = plot_data[plot_data$cell_group %in% c("mCherry-noDox cells"),], formula = Channel_2 ~ Channel_3)
model_Ch2
model_Ch4 <- lm(data = plot_data[plot_data$cell_group %in% c("mCherry-noDox cells"),], formula = Channel_4 ~ Channel_3)
model_Ch4
if(model_Ch2$coefficients[2] > 0.01){
plot_data$Channel_2 <- plot_data$Channel_2 - predict(model_Ch2, newdata = plot_data)
p1 <- ggplot() +
geom_point(plot_data, mapping = aes(x=(`Channel_3`), y=(`Channel_2`), col=cell_group), alpha = 0.5,) +
facet_grid(~cell_group)+
geom_smooth(plot_data[plot_data$cell_group != "WT-C3H10 cells",], mapping = aes(x=(`Channel_3`), y=(`Channel_2`)))+
facet_grid(~cell_group)+
labs(title = plot_data$condition)
}
if(model_Ch4$coefficients[2] > 0.01){
plot_data$Channel_4 <- plot_data$Channel_4 - predict(model_Ch4, newdata = plot_data)
p2 <- ggplot() +
geom_point(plot_data, mapping = aes(x=(`Channel_3`), y=(`Channel_4`), color=cell_group), alpha = 0.5) +
facet_grid(~cell_group)+
geom_smooth(plot_data[plot_data$cell_group != "WT-C3H10 cells",], mapping = aes(x=(`Channel_3`), y=(`Channel_4`)))+
facet_grid(~cell_group)
}
grid.arrange(p1,p2)
## log transformation after contamination correction
plot_data.new <- plot_data
base.oi <- 10
plot_data.new[plot_data.new < base.oi] <- base.oi
for (i in paste0("Channel_", 2:4)){
plot_data.new[, i] <- log(plot_data.new[, i], base = base.oi)
}
# if plot WT and TF cells together
plot_data.sub <- plot_data.new[plot_data.new$cell_group != "mCherry-noDox cells",]
plot_data.sub$cell_group |> table() |> print()
cell_group <- names(table(plot_data.sub$cell_group))
cell_count <- table(plot_data.sub$cell_group)
p1 <- ggplot() +
geom_point(plot_data.sub, mapping = aes(x=(`Channel_3`), y=(`Channel_2`), col=cell_group), alpha = 0.5) +
geom_smooth(plot_data.sub, mapping = aes(x=(`Channel_3`), y=(`Channel_2`)))+
labs(title = plot_data.sub$condition, subtitle = paste0(cell_group[1], "=", cell_count[1], "cells;", cell_group[2], "=", cell_count[2]))+
theme_cowplot()+
scale_x_continuous(breaks = seq(1, 4, by = 0.5), labels = c(1, 1.5, 2, 2.5, 3, 3.5, 4))+
scale_y_continuous(breaks = seq(1, 4, by = 0.5), labels = c(1, 1.5, 2, 2.5, 3, 3.5, 4))
p2 <- ggplot() +
geom_point(plot_data.sub, mapping = aes(x=(`Channel_3`), y=(`Channel_4`), col=cell_group), alpha = 0.5) +
geom_smooth(plot_data.sub, mapping = aes(x=(`Channel_3`), y=(`Channel_4`)))+
theme_cowplot()+
scale_x_continuous(breaks = seq(1, 4, by = 0.5), labels = c(1, 1.5, 2, 2.5, 3, 3.5, 4))+
scale_y_continuous(breaks = seq(1, 4, by = 0.5), labels = c(1, 1.5, 2, 2.5, 3, 3.5, 4))
grid.arrange(p1,p2)
p <- grid.arrange(p1, p2)
p
Output_name <- gsub(";","_",condition.oi)
Output_name <- gsub("=","-", Output_name)
ggsave(p, filename = paste0("figures/",Output_name, ".pdf"), height = 8, width = 6)
})