-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsent_ui.r
More file actions
277 lines (200 loc) · 9.95 KB
/
sent_ui.r
File metadata and controls
277 lines (200 loc) · 9.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# Load the required libraries
library(shiny)
library(ggplot2)
# Define the UI
ui <- fluidPage(
titlePanel("Sentiment Analysis"),
sidebarLayout(
sidebarPanel(
fileInput("file", "Choose CSV File"),
checkboxInput("removeStopwords", "Remove Stopwords", value = TRUE),
actionButton("runAnalysis", "Run Analysis"),
),
mainPanel(
plotOutput("emotionPlot"),
tableOutput("confusionMatrix"),
tableOutput("metrics"),
tableOutput("outputTable")
)
)
)
# Define the server
server <- function(input, output) {
# Load the CSV file
data <- reactive({
req(input$file)
read.csv(input$file$datapath)
})
output_df <- data.frame(Line = character(), Sentiment = character(), stringsAsFactors = FALSE)
# Perform sentiment analysis
observeEvent(input$runAnalysis, {
# Preprocessing code here
library(caret)
df =data()
df = subset(df, df$sentiment=='positive' | df$sentiment=='negative')
text_column <- df[, 1]
# Remove punctuation and other special characters
text_column <- gsub("[^a-zA-Z0-9 ]", "", text_column)
# Remove leading and trailing whitespaces
text_column <- trimws(text_column)
# Remove stopwords (optional)
# You can use the stopwords provided by the 'tm' package or customize your own list
library(tm)
text_column <- removeWords(text_column, stopwords("english"))
text_column <- tolower(text_column)
# Update the cleaned text back to the data frame
df[, 2] <- text_column
library(Matrix)
set.seed(123)
# Split the data into training, testing, and verification datasets
train_indices <- sample(1:nrow(df), 0.7 * nrow(df)) # 60% for training
test_indices <- sample(setdiff(1:nrow(df), train_indices), 0.2 * nrow(df)) # 30% for testing
verify_indices <- setdiff(1:nrow(df), c(train_indices, test_indices)) # remaining data for verification
# Create the training, testing, and verification datasets
train_data <- df[train_indices, ]
test_data <- df[test_indices, ]
verify_data <- df[verify_indices, ]
train_data
# Write processed clean text into cleaned_text
cleaned_text <- train_data[,1]
# Create a corpus from the text data
corpus <- Corpus(VectorSource(cleaned_text))
# Preprocess the corpus
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
# Create a document-term matrix
dtm <- DocumentTermMatrix(corpus)
# Calculate TF-IDF
tfidf <- weightTfIdf(dtm)
library(tidytext)
sentiments = get_sentiments()
pos = subset(sentiments,sentiments$sentiment=='positive')
pos = pos[,1]
pos
neg = subset(sentiments,sentiments$sentiment=='negative')
neg
neg = neg[,1]
neg
labels_df <- data.frame(
Emotion = c(
"positive", "negative"
),
Words = c(
paste(as.character(pos),"happy, positive,fantastic, anticipation, eager, exciting,excite, hopeful, yearning, hype, can't wait, countdown, anticipation level: 100, on the edge of my seat, thrilled, anxious,joy, happy, delighted, ecstatic, blissful, lol, happy dance, cheers, ROFL, high-five, excited, thrilled, like, good, love, haha,surprise, amazed, astonished, shocked, startled, OMG, mind blown, plot twist, jaw-dropping, wowza, shocked, stunned, new, better,trust, trusted, confident, faithful, reliable, BFF, loyal, reliable source, confidante, trust fall, reliable, trustworthy, honest, dependable, work,love, like, positive, success, happiness, love, thumbs up, amazing, awesome sauce, winning, happy, grateful, optimistic, blessed, loved "),
paste(as.character(neg),"sad, negative, disgust, disgusted,not,please loathing, revulsion, repelled,suck, eww, grossed out, vomit, cringe, sickened, gross, nauseated, revolted, repulsed.anger, angry, annoyed, furious, rage, ragequit, triggered, salty, hater, flame, hate, mad, irritatedfear, afraid, scared, terrified, panic, spooky, scaredy-cat, nightmare, goosebumps, chilling, anxious, worried sadness, sad, depressed, sorrow, grief, heartbroken, tears, down in the dumps, lonely, sob, miserable, gloomy, miss,fail, hate, dislike, negative, failure, disappointment, haters gonna hate, fail, facepalm, disappointed, nope, frustrated, annoyed, jealous, regretful, guilty, love, well, cold, bored, miss, sad, sick, sorry, tired, not, ugh, no, little, doesn't, never, sucks")
),
stringsAsFactors = FALSE
)
# Install the 'naivebayes' package if not already installed
# install.packages("naivebayes")
# Load the 'naivebayes' library
library(e1071)
# Convert the TF-IDF matrix to a data frame
tfidf_df <- as.data.frame(as.matrix(tfidf))
# Repeat the sentiment labels to match the number of rows in tfidf_df
num_rows <- nrow(tfidf_df)
labels <- rep(labels_df$Emotion, length.out = num_rows)
# Add the sentiment labels to the TF-IDF data frame
tfidf_df$Sentiment <- labels
# Train the Naive Bayes classifier
naive_model <- naiveBayes(Sentiment ~ ., data = tfidf_df,laplace = 1)
# Assuming you have a test dataset called 'test_data' with preprocessed text in the 6th column
test_text <- test_data[, 1]
test_text
# Preprocess the test text using the same steps as before
# TESTING
test_text <- tolower(test_text)
test_text <- gsub("[^a-zA-Z0-9 ]", "", test_text)
test_text <- trimws(test_text)
test_text <- removeWords(test_text, stopwords("english"))
# Convert the test text to a document-term matrix using the previous corpus
test_corpus <- Corpus(VectorSource(test_text))
test_corpus <- tm_map(test_corpus, content_transformer(tolower))
test_corpus <- tm_map(test_corpus, removePunctuation)
test_corpus <- tm_map(test_corpus, removeNumbers)
test_corpus <- tm_map(test_corpus, removeWords, stopwords("english"))
test_dtm <- DocumentTermMatrix(test_corpus)
# Calculate TF-IDF for the test data using the previous term frequency matrix
test_tfidf <- weightTfIdf(test_dtm)
# Convert the TF-IDF matrix to a data frame
test_tfidf_df <- as.data.frame(as.matrix(test_tfidf))
# Use the trained Naive Bayes model to predict sentiment labels for the test data
predicted_sentiment <- predict(naive_model, newdata = test_tfidf_df)
test_data <- na.omit(test_data)
c = 0
# n = nrows(predicted_sentiment)
predicted_sentiment
# while()
# Create a data frame with the Line and Sentiment columns
result_df <- test_data[predicted_sentiment %in% labels_df$Emotion, c("Line")]
result_df
print(result_df)
for (i in 1:100) {
line <- test_data[i, 1]
l2 <- test_data[i, 3]
sentiment <- predicted_sentiment[i]
output_df <- rbind(output_df, data.frame(Line = line, Sentiment = sentiment, stringsAsFactors = FALSE))
}
library(ggplot2)
# Count the occurrences of each sentiment
sentiment_counts <- table(predicted_sentiment)
# Create a data frame from the sentiment counts
sentiment_data <- data.frame(Sentiment = names(sentiment_counts),
Count = as.numeric(sentiment_counts))
actual_sentiments <- test_data[,3]
# Plot emotions distribution
output$emotionPlot <- renderPlot({
sentiment_counts <- table(predicted_sentiment)
sentiment_data <- data.frame(Sentiment = names(sentiment_counts),
Count = as.numeric(sentiment_counts))
ggplot(sentiment_data, aes(x = Sentiment, y = Count)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(x = "Sentiment", y = "Count") +
ggtitle("Emotions Distribution") +
theme_minimal()
})
# CONFUSION MATRIX
confusion_matrix <- confusionMatrix(data = factor(predicted_sentiment),
reference = factor(actual_sentiments))
# Extract the confusion matrix values
true_negatives <- confusion_matrix$table[1, 1]
false_negatives <- confusion_matrix$table[2, 1]
true_positives <- confusion_matrix$table[2, 2]
false_positives <- confusion_matrix$table[1, 2]
# Calculate accuracy
accuracy <- sum(diag(confusion_matrix$table)) / sum(confusion_matrix$table)
# Calculate sensitivity (true positive rate)
sensitivity <- true_positives / (true_positives + false_negatives)
# Calculate specificity (true negative rate)
specificity <- true_negatives / (true_negatives + false_positives)
# Calculate precision (positive predictive value)
precision <- true_positives / (true_positives + false_positives)
# Calculate F1 score
f1_score <- 2 * (precision * sensitivity) / (precision + sensitivity)
# Create a data frame with the metrics
metrics <- data.frame(
Metric = c("Accuracy", "Sensitivity", "Specificity", "Precision", "F1 Score"),
Value = c(accuracy, sensitivity, specificity, precision, f1_score)
)
metrics_table <- reactive({
metrics
})
# Extract values from confusion matrix
confusion_values <- as.data.frame(confusion_matrix$table)
confusion_values$Metric <- rownames(confusion_values)
# Display the confusion matrix
output$confusionMatrix <- renderTable({
confusion_values
})
output$metrics <- renderTable({
metrics_table()
})
output$outputTable <- renderTable({
output_df
})
})
}
# Run the Shiny app
shinyApp(ui, server)