TF-seq/code/08_RNAscope_quantification/1.1_plot_qupath_measurements_mCherry.R at main · DeplanckeLab/TF-seq · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# title: "Downstream QC and plotting on the single-cell fluorescence quantification of RNAscope images - mCherry"
# input data: "RNAscope_measurements_Fig1_EDFig1.tsv"
# author: "Wangjie Liu"


# Non-confocoal image data of mCherry (rep - 20241021)
# Best Z

library(readr)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(stringr)
library(gridExtra)

setwd("./")

measurements <- read_delim("data/RNAscope_measurements_Fig1_EDFig1.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE) %>% as.data.frame()
rownames(measurements) <- measurements$`Object ID`

measurements$analysis <- "Fluorescence"
measurements$analysis[grep("DPC", measurements$Image)] <- "DPC"
table(measurements$analysis)
# DPC Fluorescence
# 2569         2412

df <- measurements[measurements$analysis == "Fluorescence",]

ggplot(df)+geom_density(mapping = aes(x =`Nucleus: Area µm^2`))+geom_vline(xintercept = 600)
# need to decide empirically based on the data
ggplot(df)+geom_density(mapping = aes(x =`Channel_1: Nucleus: Mean`)) + geom_vline(xintercept = 2000)# DAPI signal
ggplot(df)+geom_density(mapping = aes(x =`Channel_1: Nucleus: Median`)) # DAPI signal
## this batch have some cells  have scattered and weak DAPI signal, exclude by adding cutoff for `Channel_1: Nucleus: Max`
ggplot(df)+geom_density(mapping = aes(x =`Channel_1: Nucleus: Max`)) + geom_vline(xintercept = 12000) # DAPI signal
ggplot(df)+geom_density(mapping = aes(x =`Channel_1: Cytoplasm: Mean`)) # DAPI signal
ggplot(df)+geom_density(mapping = aes(x = log10(`Channel_1: Cell: IntDen Corrected`)))  #  DAPI signal
ggplot(df)+geom_density(mapping = aes(x = `Nucleus/Cell area ratio`)) # high Nucleus/Cell area ratio is a indicative of cell clumps or multinucleated cells


# filter out outlier with very small cell size
measurements <- measurements[measurements$`Nucleus: Area µm^2` >= 600 ,] # size

# filter out outlier with weak DAPI signals (wrong cell segmentation)
measurements <- measurements[measurements$`Channel_1: Nucleus: Mean` >= 2000 & measurements$`Channel_1: Nucleus: Max` >=12000,] # DAPI signals

# filter out outlier with very strong DAPI signals (wrong cell segmentation)
measurements <- measurements[measurements$`Channel_1: Nucleus: Mean` <= 15000 & measurements$`Channel_1: Nucleus: Max` <=45000,] # DAPI signals

# exclude cells without intact nuclei indicated by low sum intensity of DAPI
measurements <- measurements[measurements$`Channel_1: Cell: IntDen Corrected` >= 10^6.5, ] # sum of background corrected DAPI signals

measurements <- measurements[measurements$`Nucleus/Cell area ratio` >= 0.3 & measurements$`Nucleus/Cell area ratio` <= 0.75 & measurements$`Channel_1: Cytoplasm: Mean` <= 4000, ]

table(measurements$analysis)


# subset to Cell Mean Intensity; Cell Mean intensity with background corrected; Cell integrated intensity with background corrected
measurements2 <- measurements[measurements$analysis != "DPC",]
measurements2 <- measurements2[, c(1, 2, grep("Cell: Mean",colnames(measurements2)), grep("Cell: IntDen Corrected",colnames(measurements2)),  grep("Cell: ROI Area",colnames(measurements2)))]
measurements2$Well <- substring(measurements2$Image, 29, 35)
table(measurements2$Well)
# R02-C02 R02-C03 R02-C04 R02-C05 R03-C02 R03-C03 R03-C04 R03-C05 R04-C02 R04-C03 R04-C04 R04-C05
# 275     211     122     168     223     270     111     184     207     379     137     125


measurements2$condition <- "Pos_neg_controls"
measurements2$condition[measurements2$Well %in% c(paste0("R02-C0",2:4))] <- "Ch2=mCherry; Ch3=WPRE"
measurements2$condition[measurements2$Well %in% c(paste0("R03-C0",2:4))] <- "Ch2=mCherry; Ch3=WPRE"
measurements2$condition[measurements2$Well %in% c(paste0("R04-C0",2:4))] <- "Ch2=mCherry; Ch3=WPRE"

table(measurements2$condition)
# Ch2=mCherry; Ch3=WPRE      Pos_neg_controls
# 1935                   477

# replicates:
measurements2$replicate <- "Others"
measurements2$replicate[measurements2$Well %in% c(paste0("R02-C0",2:4))] <- "Rep1"
measurements2$replicate[measurements2$Well %in% c(paste0("R03-C0",2:4))] <- "Rep2"
measurements2$replicate[measurements2$Well %in% c(paste0("R04-C0",2:4))] <- "Rep3"
table(measurements2$replicate)
# Others   Rep1   Rep2   Rep3
# 477    608    604    723

measurements2$cell_group <- "WT-C3H10 cells"
measurements2$cell_group[measurements2$Well %in% c(paste0("R0",2:4,"-C02"))] <- "mCherry cells"
measurements2$cell_group[measurements2$Well %in% c(paste0("R0",2:4,"-C03"))] <- "mCherry-noDox cells"
measurements2$cell_group[measurements2$Well %in% c(paste0("R0",2:4,"-C04"))] <- "WT-C3H10 cells"
table(measurements2$cell_group)
# mCherry-noDox cells       mCherry cells      WT-C3H10 cells
# 860                 705                 847

measurements2$cell_group <- factor(measurements2$cell_group, levels = c("mCherry cells", "mCherry-noDox cells", "WT-C3H10 cells"))


condition.oi <- "Ch2=mCherry; Ch3=WPRE"
plot_data <- measurements2[measurements2$condition == condition.oi,]
intensity.oi <- "Mean BgCorrected"
colnames(plot_data) <- sub(paste0(": Cell: ", intensity.oi),"",colnames(plot_data))
colnames(plot_data)

# Channel_0 = DPC
# Channel_1 = DAPI
# Channel_2 = mCherry
# Channel_3 = Vivid650
# Channel_4 = BF


plot_data <- as.data.frame(plot_data)


#  plot_data <- plot_data[plot_data$replicate == "Rep1",]
  # for the first look
  ggplot() +
    geom_point(plot_data, mapping = aes(x=(`Channel_3`), y=(`Channel_2`), col=replicate), alpha = 0.5,) +
    facet_grid(~cell_group)+
    geom_smooth(plot_data[plot_data$cell_group != "WT-C3H10 cells",], mapping = aes(x=(`Channel_3`), y=(`Channel_2`)))+
    facet_grid(~cell_group)+
    labs(title = plot_data$condition)


  # for each well and each channel, remove 1% of the upper/lower intensity (extreme values/outliers)
  cells.out <- c()
  for (Ch.oi in paste0("Channel_", 2:3)){
    for (Well.oi in unique(plot_data$Well)){
      df <- plot_data[plot_data$Well == Well.oi, ]
      cells.out <- c(cells.out, df$`Object ID`[df[, Ch.oi] >= quantile(df[, Ch.oi], 0.99)], df$`Object ID`[df[, Ch.oi] <= quantile(df[, Ch.oi], 0.01)])
    }
  }

  plot_data <- plot_data[!plot_data$`Object ID` %in% cells.out, ]


  ggplot() +
    geom_point(plot_data, mapping = aes(x=(`Channel_3`), y=(`Channel_2`), col=replicate), alpha = 0.5,) +
    facet_grid(~cell_group)+
    geom_smooth(plot_data[plot_data$cell_group != "WT-C3H10 cells",], mapping = aes(x=(`Channel_3`), y=(`Channel_2`)))+
    facet_grid(~cell_group)+
    labs(title = plot_data$condition)

  ## since there is intercorrelation between mCherry and WPRE even in mCherry-noDox cells because of mCherry leaky expression,
  ## mCherry-noDox cannot be used for contamination correction, instead, use WT-C3H10 cells to correct spillover

  model_Ch2 <- lm(data = plot_data[plot_data$cell_group %in% c("WT-C3H10 cells"),], formula = Channel_2 ~ Channel_3)
  model_Ch2
  if(model_Ch2$coefficients[2] > 0.01){
    plot_data$Channel_2 <- plot_data$Channel_2 - predict(model_Ch2, newdata = plot_data)
    p1 <- ggplot() +
      geom_point(plot_data, mapping = aes(x=(`Channel_3`), y=(`Channel_2`), col=replicate), alpha = 0.5,) +
      facet_grid(~cell_group)+
      geom_smooth(plot_data[plot_data$cell_group != "WT-C3H10 cells",], mapping = aes(x=(`Channel_3`), y=(`Channel_2`)))+
      facet_grid(~cell_group)+
      labs(title = plot_data$condition)
  }
  p1


  ## log transformation after contamination correction
  plot_data.new <- plot_data
  base.oi <- 10
  plot_data.new[plot_data.new <= base.oi] <- base.oi
  for (i in paste0("Channel_", 2:3)){
    plot_data.new[, i] <- log(plot_data.new[, i], base = base.oi)
  }


  ggplot() +
    geom_point(plot_data.new, mapping = aes(x=(`Channel_3`), y=(`Channel_2`), col=replicate), alpha = 0.5,) +
    facet_grid(~cell_group)+
    geom_smooth(plot_data.new[plot_data.new$cell_group != "WT-C3H10 cells",], mapping = aes(x=(`Channel_3`), y=(`Channel_2`)))+
    facet_grid(~cell_group)+
    labs(title = plot_data.new$condition)


  # if plot WT and TF cells together
  plot_data.sub <- plot_data.new[plot_data.new$cell_group != "mCherry-noDox cells",]
  plot_data.sub <- plot_data.sub[plot_data.sub$Channel_3 >= 1.75, ] # exclude remaining outliers after checking the images
  plot_data.sub$cell_group |> table() |> print()
  cell_group <- names(table(plot_data.sub$cell_group))
  cell_count <- table(plot_data.sub$cell_group)
  p1 <- ggplot() +
    geom_point(plot_data.sub, mapping = aes(x=(`Channel_3`), y=(`Channel_2`), col=cell_group), alpha = 0.8) +
    scale_color_manual(values = c("#F3766D","#D3D3D3"))+
    geom_smooth(plot_data.sub, mapping = aes(x=(`Channel_3`), y=(`Channel_2`)), span = 0.8,)+
    labs(title = plot_data.sub$condition, subtitle = paste0(cell_group[1], "=", cell_count[1], "cells;", cell_group[3], "=", cell_count[3]))+
    theme_cowplot()+
    scale_x_continuous(breaks = seq(1, 4, by = 0.5), labels = c(1, 1.5, 2, 2.5,  3, 3.5,  4))+
    scale_y_continuous(breaks = seq(1, 4, by = 0.5), labels = c(1, 1.5, 2, 2.5,  3, 3.5,  4))
  p1

  Output_name <- gsub(";","_",condition.oi)
  Output_name <- gsub("=","-", Output_name)
  ggsave(p1, filename = paste0("figures/",Output_name, ".pdf"), height = 4, width = 6)