-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSentiment_Utils.py
More file actions
293 lines (235 loc) · 11.9 KB
/
Sentiment_Utils.py
File metadata and controls
293 lines (235 loc) · 11.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
from alpha_vantage.timeseries import TimeSeries
import json
import requests
# Standard Libraries
from datetime import timedelta, datetime
import matplotlib.pyplot as plt
import praw
from textblob import TextBlob
from collections import deque, defaultdict
def alpha_load_api_key():
with open('alpha_secret.json') as file:
data = json.load(file)
return data['key']
def reddit_load_api_key():
with open('reddit_secret.json') as file:
data = json.load(file)
return data['client_id'], data['client_secret'], data['user_agent']
def load_twitter_api_keys():
with open('twitter_secret.json') as file:
data = json.load(file)
return data['client_id'], data['client_secret'], data['app_id'], data['access_token'], data['access_token_secret']
alpha_api_key = alpha_load_api_key()
reddit_api_key = reddit_load_api_key()
twitter_api_key = load_twitter_api_keys()
net_institutional_trading = defaultdict(deque)
def alpha_get_news_sentiment(tickers=None, topics=None, time_from=None, time_to=None, sort='LATEST', limit=50):
api_key = alpha_api_key
url = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT'
params = {
'apikey': api_key,
'tickers': tickers,
'topics': topics,
'time_from': time_from,
'time_to': time_to,
'sort': sort,
'limit': limit
}
response = requests.get(url, params=params)
if response.status_code == 200:
return response.json() # Return the JSON data as a dictionary
else:
return f"Error fetching data: {response.status_code}"
def alpha_get_top_gainers_losers():
api_key = alpha_api_key
url = 'https://www.alphavantage.co/query?function=TOP_GAINERS_LOSERS'
params = {
'apikey': api_key
}
response = requests.get(url, params=params)
return response.json()
def calculate_time_weight(time_published, current_time, max_days=30):
"""Calculate a weight based on the recency of the news item."""
try:
publication_date = datetime.strptime(time_published, '%Y%m%dT%H%M%S')
days_since_publication = (current_time - publication_date).days
return max(0, (max_days - days_since_publication) / max_days)
except ValueError:
return 0 # Return 0 weight if time format is incorrect
def alpha_extract_and_calculate_sentiment(ticker, response_dict):
total_weighted_score = 0
total_weight = 0
current_time = datetime.utcnow()
news_items = response_dict.get('feed', [])
for item in news_items:
time_published = item.get('time_published', '')
time_weight = calculate_time_weight(time_published, current_time)
ticker_sentiments = item.get("ticker_sentiment", [])
for ticker_sentiment in ticker_sentiments:
if ticker_sentiment["ticker"] == ticker:
sentiment_score = float(ticker_sentiment["ticker_sentiment_score"])
relevance_score = float(ticker_sentiment["relevance_score"])
combined_weight = relevance_score * time_weight
total_weighted_score += sentiment_score * combined_weight
total_weight += combined_weight
if total_weight == 0:
return "No relevant news found for the ticker."
overall_score = total_weighted_score / total_weight
# Determine sentiment label based on overall score
sentiment_label = "Neutral"
if overall_score <= -0.35:
sentiment_label = "Bearish"
elif -0.35 < overall_score <= -0.15:
sentiment_label = "Somewhat-Bearish"
elif 0.15 <= overall_score < 0.35:
sentiment_label = "Somewhat-Bullish"
elif overall_score >= 0.35:
sentiment_label = "Bullish"
return {"overall_sentiment_score": overall_score, "overall_sentiment_label": sentiment_label}
def weighted_reddit_sentiment_analysis(subreddit_name, ticker, time_frame_days=30, post_limit=30, comment_limit=20, min_upvotes=10):
client_id, client_secret, user_agent = reddit_load_api_key()
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)
subreddit = reddit.subreddit(subreddit_name)
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=time_frame_days)
total_weighted_sentiment = 0
total_weight = 0
for post in subreddit.search(f"{ticker}", sort='new', time_filter='all', limit=post_limit):
post_date = datetime.utcfromtimestamp(post.created_utc)
if start_date <= post_date <= end_date and post.score >= min_upvotes:
post_comments = len(post.comments)
post_age = (end_date - post_date).total_seconds() / 3600 # in hours
# Attempt to retrieve the user's karma
try:
post_karma = sum(post.author.karma().values()) if post.author else 0
except AttributeError:
post_karma = 0
# Post engagement and user reputation weight
post_weight = (post.score * post_comments) / (post_age + 1) * (post_karma + 1)
post_analysis = TextBlob(post.title)
post_sentiment_score = post_analysis.sentiment.polarity
total_weighted_sentiment += post_sentiment_score * post_weight
total_weight += post_weight
# Analyze top comments
post.comments.replace_more(limit=0)
for comment in post.comments[:comment_limit]:
comment_age = (end_date - datetime.utcfromtimestamp(comment.created_utc)).total_seconds() / 3600 # in hours
# Attempt to retrieve the user's karma
try:
comment_karma = sum(comment.author.karma().values()) if comment.author else 0
except AttributeError:
comment_karma = 0
# Comment engagement and user reputation weight
comment_weight = (comment.score / (comment_age + 1)) * (comment_karma + 1)
comment_analysis = TextBlob(comment.body)
comment_sentiment_score = comment_analysis.sentiment.polarity
total_weighted_sentiment += comment_sentiment_score * comment_weight
total_weight += comment_weight
average_weighted_sentiment = total_weighted_sentiment / total_weight if total_weight > 0 else 0
return average_weighted_sentiment
def aggregate_subreddit_sentiment(subreddits, ticker):
aggregated_sentiment = 0
for subreddit in subreddits:
sentiment = weighted_reddit_sentiment_analysis(subreddit, ticker)
aggregated_sentiment += sentiment
overall_sentiment = aggregated_sentiment / len(subreddits) if subreddits else 0
sentiment_label = "Neutral"
if overall_sentiment <= -0.35:
sentiment_label = "Bearish"
elif -0.35 < overall_sentiment <= -0.15:
sentiment_label = "Somewhat-Bearish"
elif 0.15 <= overall_sentiment < 0.35:
sentiment_label = "Somewhat-Bullish"
elif overall_sentiment >= 0.35:
sentiment_label = "Bullish"
return {"overall_sentiment_score": overall_sentiment, "overall_sentiment_label": sentiment_label}
def get_intraday_stock_data(symbol):
ts = TimeSeries(key=alpha_api_key, output_format='pandas')
data, _ = ts.get_intraday(symbol=symbol, interval='5min', outputsize='full')
data.columns = ['Open', 'High', 'Low', 'Close', 'Volume'] # Rename columns
return data
def time_aggregated_block_trades(data, time_window='1min', block_size=10000):
# Resampling data to the desired time frame and summing volumes
aggregated_data = data.resample(time_window).agg({'Open': 'first',
'Close': 'last',
'Volume': 'sum'})
# Identifying large volume trades and their price impact
block_trades = aggregated_data[aggregated_data['Volume'] >= block_size]
block_trades['Price Impact'] = block_trades['Close'] - block_trades['Open']
# Sorting by Volume and getting the top 10 largest trades
top_block_trades = block_trades.nlargest(10, 'Volume')
return top_block_trades
def detect_volume_anomalies(data, std_factor=3):
# Using standard deviation to find significant deviations in volume
avg_volume = data['Volume'].mean()
std_volume = data['Volume'].std()
threshold = avg_volume + std_factor * std_volume
volume_anomalies = data[(data['Volume'] > threshold) | (data['Volume'] < avg_volume - std_factor * std_volume)]
# Sorting by Volume and getting the top 10 largest anomalies
top_anomalies = volume_anomalies.nlargest(10, 'Volume')
return top_anomalies
def highlight_key_info(data):
# Extracting and returning key details
return data[['Open', 'Close', 'Volume']]
def weighted_volume_sentiment_analysis(data):
# Assigning weights based on volume and price impact
data['Weight'] = data['Volume'] * abs(data['Close'] - data['Open'])
data['Sentiment'] = data.apply(lambda row: 1 if row['Close'] > row['Open'] else -1, axis=1)
weighted_sentiment = sum(data['Weight'] * data['Sentiment']) / sum(data['Weight'])
# Categorizing the sentiment
if weighted_sentiment > 0:
return "Bullish"
elif weighted_sentiment < 0:
return "Bearish"
else:
return "Neutral"
def calculate_net_institutional_trading(block_trades, date, ticker, net_institutional_trading):
"""
Calculates and stores the net institutional trading for a given ticker on a specific date.
Parameters:
block_trades (DataFrame): DataFrame containing block trade data.
date (datetime): The date for which to calculate net institutional trading.
ticker (str): Ticker symbol of the stock.
net_institutional_trading (dict): Dictionary to store the net institutional trading values.
Returns:
float: Net institutional trading value for the given date.
"""
# Filter the block trades for the given date
filtered_trades = block_trades[block_trades.index.date == date]
# Calculate the total bought and sold for each day
filtered_trades['Net'] = filtered_trades['Close'] - filtered_trades['Open']
filtered_trades['Institutional Trading'] = filtered_trades['Net'] * filtered_trades['Volume']
# Calculate the net institutional trading for the day
net_value = filtered_trades['Institutional Trading'].sum()
# Store the net value in the dictionary
if ticker in net_institutional_trading:
net_institutional_trading[ticker].append((date, net_value))
else:
net_institutional_trading[ticker] = [(date, net_value)]
return net_value
def visualize_net_institutional_trading_today():
# Create a bar graph of the net institutional trading for each ticker for today
tickers, trading = zip(*[(ticker, net_institutional_trading[ticker][-1][1]) for ticker in symbols])
colors = ['g' if x > 0 else 'r' for x in trading]
plt.bar(tickers, trading, color=colors)
plt.xlabel('Ticker')
plt.ylabel('Net Institutional Trading')
plt.title('Net Institutional Trading for Today')
plt.show()
def visualize_net_institutional_trading_5_days():
# Create a bar graph of the net institutional trading for each ticker for the most recent 5 days
tickers, trading = zip(*[(ticker, sum([x[1] for x in net_institutional_trading[ticker]])) for ticker in symbols])
colors = ['g' if x > 0 else 'r' for x in trading]
plt.bar(tickers, trading, color=colors)
plt.xlabel('Ticker')
plt.ylabel('Net Institutional Trading')
plt.title('Net Institutional Trading for the Most Recent 5 Days')
plt.show()
symbols = ['SPY', 'MSFT', 'AAPL', 'AMZN', 'NVDA', 'GOOGL', 'META', 'GOOG', 'BRK-B', 'TSLA', 'UNH']
if __name__ == '__main__':
subreddits = ['wallstreetbets', 'stocks']
ticker = 'AAPL'
#print(alpha_extract_and_calculate_sentiment(ticker, alpha_get_news_sentiment(ticker)))
#print(aggregate_subreddit_sentiment(subreddits, ticker))
print(type(time_aggregated_block_trades(get_intraday_stock_data(ticker))))
print(type(detect_volume_anomalies(get_intraday_stock_data(ticker))))