forked from aavanesy/RParallelCompute
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLecture8.R
More file actions
59 lines (44 loc) · 1.25 KB
/
Lecture8.R
File metadata and controls
59 lines (44 loc) · 1.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# 8. Coding Session: Sequential Script Execution
library(dplyr); library(tidyr); library(tibble);
library(tidytext); library(rvest);
library(tictoc)
# Single process calculations
get_ngramms <- function(url){
read_html(url) %>%
html_elements("p") %>%
html_text() %>%
enframe() %>%
drop_na() %>%
rename(line = 1, text = 2) %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
filter(!is.na(bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
count(word1, word2, sort = TRUE) %>%
unite(bigram, word1, word2, sep = " ") %>%
filter(n >= 5)
}
## All countries
country_links <- read.csv('country_links.csv') %>%
pull(1) %>%
head(10)
## Using Loop ----
for(i in 1:10){
print(i)
res_i <- get_ngramms(country_links[i])
}
# Using Base R and Apply Scripts ----
tic()
res_apply <- lapply(country_links[1:10], get_ngramms)
toc()
# Using purrr and map ----
tic()
res_map <- purrr::map(.x = country_links[1:10], .f = get_ngramms)
toc()
# When to use apply and map? ----
# extra functions
?group_map
iris %>%
group_by(Species) %>%
group_map(~ broom::tidy(lm(Petal.Length ~ Sepal.Length, data = .x)))