-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTwitter API Tweets extraction R Code.R
More file actions
59 lines (46 loc) · 2.3 KB
/
Twitter API Tweets extraction R Code.R
File metadata and controls
59 lines (46 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
##############################
# R has a dedicated package twitteR
# For additional functions refer twitteR.pdf (twitteR package manual)
# Sample code for getting twitter data -
#
require("twitteR")||install.packages("twitteR")
require("base64enc")||install.packages("base64enc")
library(twitteR)
library(base64enc)
#############################################
# Authentication
#############################################
# options(httr_oauth_cache=T)
api_key <- "" #Consumer key: *
api_secret <- "" # Consumer secret: *
access_token <- "182265101-" # Access token:
access_token_secret <- "" # Access token secret:
# After this line of command type 1 for selection as Yes
setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret)
#[1] "Using direct authentication"
#Use a local file to cache OAuth access credentials between R sessions?
#1: Yes
#2: No
#Selection: 1
#############################################
# Extract Tweets
#############################################
hashtags = c('#GST')
for (hashtag in hashtags){
tweets = searchTwitter(hashtag, n=1000 ) # hash tag for tweets search and number of tweets
tweets = twListToDF(tweets) # Convert from list to dataframe
tweets.df = tweets[,1] # assign tweets for cleaning
tweets.df = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", tweets.df);head(tweets.df)
tweets.df = gsub("@\\w+", "", tweets.df);head(tweets.df) # regex for removing @user
tweets.df = gsub("[[:punct:]]", "", tweets.df);head(tweets.df) # regex for removing punctuation mark
tweets.df = gsub("[[:digit:]]", "", tweets.df);head(tweets.df) # regex for removing numbers
tweets.df = gsub("http\\w+", "", tweets.df);head(tweets.df) # regex for removing links
tweets.df = gsub("\n", " ", tweets.df);head(tweets.df) ## regex for removing new line (\n)
tweets.df = gsub("[ \t]{2,}", " ", tweets.df);head(tweets.df) ## regex for removing two blank space
tweets.df = gsub("[^[:alnum:]///' ]", " ", tweets.df) # keep only alpha numeric
tweets.df = iconv(tweets.df, "latin1", "ASCII", sub="") # Keep only ASCII characters
tweets.df = gsub("^\\s+|\\s+$", "", tweets.df);head(tweets.df) # Remove leading and trailing white space
tweets[,1] = tweets.df # save in Data frame
head(tweets)
write.csv(tweets,paste0(gsub('#','',hashtag),'.csv'))
}