-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
193 lines (160 loc) · 7.99 KB
/
app.py
File metadata and controls
193 lines (160 loc) · 7.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# Author: Jivan Jamdar
# Date: 2023-10-01 Time: 20:57:43PM
# Description: Streamlit app for sentiment analysis using TextBlob and VADER
import streamlit as st
import pandas as pd
import plotly.express as px
from analyzer import SentimentAnalyzer
def main():
st.set_page_config(
page_title="Sentiment Analysis App",
page_icon="📊",
layout="wide"
)
st.title("📊 Sentiment Analysis Tool")
st.write("Analyze the sentiment of tweets, product reviews, or any text!")
analyzer = SentimentAnalyzer()
# different tabs => different functionalities
tab1, tab2, tab3 = st.tabs(["Single Text Analysis", "Batch Analysis", "About"])
with tab1:
st.subheader("Single Text Analysis")
# tool selection
analysis_tool = st.radio(
"Select analysis tool:",
["TextBlob", "VADER"],
horizontal=True
)
# text input
text_input = st.text_area(
"Enter text to analyze:",
height=150,
placeholder="Type or paste your text here..."
)
if st.button("Analyze Sentiment"):
if text_input:
with st.spinner("Analyzing..."):
if analysis_tool == "TextBlob":
result = analyzer.analyze_textblob(text_input)
method = "TextBlob"
else:
result = analyzer.analyze_vader(text_input)
method = "VADER"
# display results with columns
st.subheader("Results")
col1, col2 = st.columns(2)
with col1:
st.markdown(f"### {result['emoji']} {result['sentiment']}")
st.write(f"Analysis method: {method}")
with col2:
if method == "TextBlob":
st.metric("Polarity", f"{result['polarity']:.2f}")
st.metric("Subjectivity", f"{result['subjectivity']:.2f}")
else: # VADER
st.metric("Compound Score", f"{result['compound']:.2f}")
# detailed breakdown for VADER
if method == "VADER":
st.subheader("Sentiment Breakdown")
vader_df = pd.DataFrame({
'Sentiment': ['Positive', 'Neutral', 'Negative'],
'Score': [result['pos'], result['neu'], result['neg']]
})
fig = px.bar(
vader_df,
x='Sentiment',
y='Score',
color='Sentiment',
color_discrete_map={
'Positive': '#2ECC71',
'Neutral': '#3498DB',
'Negative': '#E74C3C'
}
)
st.plotly_chart(fig, use_container_width=True)
else:
st.warning("Please enter some text to analyze.")
with tab2:
st.subheader("Batch Analysis")
st.write("Upload a CSV or Excel file with a column containing text to analyze multiple entries at once.")
uploaded_file = st.file_uploader("Upload your file", type=["csv", "xlsx"])
if uploaded_file is not None:
try:
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
st.write("Preview of uploaded data:")
st.dataframe(df.head())
text_column = st.selectbox("Select the column containing text to analyze:", df.columns)
analysis_tool = st.radio(
"Select analysis tool for batch processing:",
["TextBlob", "VADER"],
horizontal=True
)
if st.button("Run Batch Analysis"):
with st.spinner("Analyzing all entries..."):
results = []
for text in df[text_column]:
if pd.notna(text): # skip NaN values
if analysis_tool == "TextBlob":
result = analyzer.analyze_textblob(str(text))
else:
result = analyzer.analyze_vader(str(text))
results.append(result)
else:
# handle NaN values
results.append({
'sentiment': 'Unknown',
'emoji': '❓',
'compound' if analysis_tool == "VADER" else 'polarity': 0
})
# create results DataFrame
results_df = pd.DataFrame(results)
df_with_sentiment = pd.concat([df, results_df], axis=1)
st.subheader("Results")
st.dataframe(df_with_sentiment)
# download button for results
st.download_button(
label="Download Results",
data=df_with_sentiment.to_csv(index=False),
file_name="sentiment_analysis_results.csv",
mime="text/csv"
)
# show summary statistics
st.subheader("Sentiment Distribution")
sentiment_counts = results_df['sentiment'].value_counts().reset_index()
sentiment_counts.columns = ['Sentiment', 'Count']
fig = px.pie(
sentiment_counts,
names='Sentiment',
values='Count',
color='Sentiment',
color_discrete_map={
'Positive': '#2ECC71',
'Neutral': '#3498DB',
'Negative': '#E74C3C',
'Unknown': '#95A5A6'
}
)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error processing the uploaded file: {str(e)}")
with tab3:
st.subheader("About This App")
st.write("""
This Sentiment Analysis App uses two popular techniques:
1. **TextBlob**: A simple NLP library that provides a simple API for diving into common NLP tasks.
- Polarity: Score from -1 (very negative) to +1 (very positive)
- Subjectivity: Score from 0 (objective) to 1 (subjective)
2. **VADER** (Valence Aware Dictionary and sEntiment Reasoner): A lexicon and rule-based sentiment analysis tool specifically attuned to sentiments expressed in social media.
- Compound: Normalized score from -1 (very negative) to +1 (very positive)
- Positive, Neutral, Negative: Proportions of text that fall in each category
### When to use each tool:
- TextBlob is simpler and works well for general text
- VADER is better for social media content, slang, and emoticons
### Limitations:
- Neither tool understands sarcasm well
- Context is often missed
- Language specific (works best with English)
""")
if __name__ == "__main__":
main()