-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_plot.R
More file actions
140 lines (114 loc) · 4.6 KB
/
generate_plot.R
File metadata and controls
140 lines (114 loc) · 4.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
library(tidyverse)
hidden_states <- read_csv("./data/output/posterior/hidden_states.csv")
posterior <- read_csv("./data/output/posterior/exampleblck32.posterior.csv")
viterbi <- read_csv("./data/output/viterbi/exampleblck32.viterbi.csv")
topo_levels <- c("({sp1,sp2},sp3)",
"((sp1,sp2),sp3)",
"((sp1,sp3),sp2)",
"((sp2,sp3),sp1)")
post_block32 <- posterior %>%
mutate(position_idx = na_if(position_idx, -9)) %>%
drop_na(position_idx) %>%
pivot_longer(
cols = starts_with("prob_state_"),
names_to = "state_col",
values_to = "prob"
) %>%
mutate(state_idx = as.integer(readr::parse_number(state_col))) %>%
select(-state_col) %>%
left_join(hidden_states %>% transmute(state_idx = as.integer(state_idx), topology),
by = "state_idx") %>%
group_by(alignment_block_idx, position_idx, topology) %>%
summarise(prob = sum(prob, na.rm = TRUE), .groups = "drop") %>%
mutate(topology = factor(topology, levels = topo_levels)) %>%
pivot_wider(names_from = topology, values_from = prob) %>%
mutate(across(all_of(topo_levels), ~coalesce(.x, 0))) %>%
select(alignment_block_idx, position_idx, all_of(topo_levels)) %>%
arrange(alignment_block_idx, position_idx)
print(post_block32)
print(min(post_block32$position_idx))
print(max(post_block32$position_idx))
position_start <- min(post_block32$position_idx)
position_end <- max(post_block32$position_idx)
state_mapping <- hidden_states %>%
select(state_idx, topology)
post_long <- post_block32 %>%
pivot_longer(
cols = -c(alignment_block_idx, position_idx),
names_to = "topology",
values_to = "probability"
) %>%
select(-alignment_block_idx) %>%
mutate(
topology = str_replace_all(topology, "sp1", "Human"),
topology = str_replace_all(topology, "sp2", "Chimp"),
topology = str_replace_all(topology, "sp3", "Gorilla")
)
viterbi <- viterbi %>%
mutate(Block_idx = as.integer(Block_idx)) %>%
filter(Block_idx == 32) %>%
mutate(most_likely_state = as.numeric(most_likely_state)) %>%
left_join(state_mapping, by = c("most_likely_state" = "state_idx")) %>%
arrange(position_start) %>%
select(-Block_idx) %>%
select(-most_likely_state) %>%
mutate(
topology = str_replace_all(topology, "sp1", "Human"),
topology = str_replace_all(topology, "sp2", "Chimp"),
topology = str_replace_all(topology, "sp3", "Gorilla")
)
max_top <- post_long %>%
group_by(position_idx) %>%
slice_max(probability, n = 1, with_ties = TRUE) %>%
slice(1) %>%
ungroup() %>%
arrange(position_idx)
max_post <- max_top %>%
arrange(position_idx) %>%
mutate(group = cumsum(topology != lag(topology, default = first(topology)))) %>%
group_by(group, topology) %>%
summarize(
position_start = first(position_idx),
position_end = last(position_idx),
.groups = "drop"
) %>%
select(-group) %>%
mutate(
topology = str_replace_all(topology, "sp1", "Human"),
topology = str_replace_all(topology, "sp2", "Chimp"),
topology = str_replace_all(topology, "sp3", "Gorilla")
)
padding <- 0.025
top_vec <- c("({Human,Chimp},Gorilla)",
"((Human,Chimp),Gorilla)",
"((Human,Gorilla),Chimp)",
"((Chimp,Gorilla),Human)")
xlim_rng <- range(post_block32$position_idx, na.rm = TRUE)
p <- ggplot(post_long) +
geom_line(aes(x = position_idx, y = probability, color = topology)) +
geom_rect(aes(xmin = position_start, xmax = position_end,
ymin = 1.11 - padding, ymax = 1.11 + padding,
fill = topology, color = topology),
data = viterbi) +
geom_rect(aes(xmin = position_start, xmax = position_end,
ymin = 1.05 - padding, ymax = 1.05 + padding,
fill = topology, color = topology),
data = max_post) +
scale_color_brewer(palette = "Set1",
breaks = top_vec,
labels = top_vec) +
scale_fill_brewer(palette = "Set1",
breaks = top_vec,
labels = top_vec) +
scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1, 1.05, 1.11),
labels = c(0, 0.25, 0.5, 0.75, 1, "MaxP", "Viterbi")) +
scale_x_continuous(expand = c(0, 0)) +
theme_bw() +
theme(panel.grid = element_blank(),
panel.border = element_rect(color = "black", fill = NA)) +
geom_hline(yintercept = c(0, 0.25, 0.5, 0.75, 1),
color = "grey80", linetype = "dashed") +
labs(x = "Position (bp)", y = "Posterior probability",
fill = "Genealogy", color = "Genealogy")
ggsave(filename = "./plot.png",
plot = p, width = 12, height = 6, dpi = 600)