Options-Toolkit/Sentiment_Utils.py at main · aperswal/Options-Toolkit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
from alpha_vantage.timeseries import TimeSeries
import json
import requests
# Standard Libraries
from datetime import timedelta, datetime
import matplotlib.pyplot as plt
import praw
from textblob import TextBlob
from collections import deque, defaultdict

def alpha_load_api_key():
    with open('alpha_secret.json') as file:
        data = json.load(file)
        return data['key']

def reddit_load_api_key():
    with open('reddit_secret.json') as file:
        data = json.load(file)
        return data['client_id'], data['client_secret'], data['user_agent']

def load_twitter_api_keys():
    with open('twitter_secret.json') as file:
        data = json.load(file)
        return data['client_id'], data['client_secret'], data['app_id'], data['access_token'], data['access_token_secret']

alpha_api_key = alpha_load_api_key()
reddit_api_key = reddit_load_api_key()
twitter_api_key = load_twitter_api_keys()
net_institutional_trading = defaultdict(deque)

def alpha_get_news_sentiment(tickers=None, topics=None, time_from=None, time_to=None, sort='LATEST', limit=50):
    api_key = alpha_api_key
    url = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT'
    params = {
        'apikey': api_key,
        'tickers': tickers,
        'topics': topics,
        'time_from': time_from,
        'time_to': time_to,
        'sort': sort,
        'limit': limit
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()  # Return the JSON data as a dictionary
    else:
        return f"Error fetching data: {response.status_code}"

def alpha_get_top_gainers_losers():
    api_key = alpha_api_key
    url = 'https://www.alphavantage.co/query?function=TOP_GAINERS_LOSERS'
    params = {
        'apikey': api_key
    }
    response = requests.get(url, params=params)
    return response.json()

def calculate_time_weight(time_published, current_time, max_days=30):
    """Calculate a weight based on the recency of the news item."""
    try:
        publication_date = datetime.strptime(time_published, '%Y%m%dT%H%M%S')
        days_since_publication = (current_time - publication_date).days
        return max(0, (max_days - days_since_publication) / max_days)
    except ValueError:
        return 0  # Return 0 weight if time format is incorrect

def alpha_extract_and_calculate_sentiment(ticker, response_dict):
    total_weighted_score = 0
    total_weight = 0
    current_time = datetime.utcnow()

    news_items = response_dict.get('feed', [])

    for item in news_items:
        time_published = item.get('time_published', '')
        time_weight = calculate_time_weight(time_published, current_time)

        ticker_sentiments = item.get("ticker_sentiment", [])
        for ticker_sentiment in ticker_sentiments:
            if ticker_sentiment["ticker"] == ticker:
                sentiment_score = float(ticker_sentiment["ticker_sentiment_score"])
                relevance_score = float(ticker_sentiment["relevance_score"])

                combined_weight = relevance_score * time_weight
                total_weighted_score += sentiment_score * combined_weight
                total_weight += combined_weight

    if total_weight == 0:
        return "No relevant news found for the ticker."

    overall_score = total_weighted_score / total_weight

    # Determine sentiment label based on overall score
    sentiment_label = "Neutral"
    if overall_score <= -0.35:
        sentiment_label = "Bearish"
    elif -0.35 < overall_score <= -0.15:
        sentiment_label = "Somewhat-Bearish"
    elif 0.15 <= overall_score < 0.35:
        sentiment_label = "Somewhat-Bullish"
    elif overall_score >= 0.35:
        sentiment_label = "Bullish"

    return {"overall_sentiment_score": overall_score, "overall_sentiment_label": sentiment_label}

def weighted_reddit_sentiment_analysis(subreddit_name, ticker, time_frame_days=30, post_limit=30, comment_limit=20, min_upvotes=10):
    client_id, client_secret, user_agent = reddit_load_api_key()
    reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)

    subreddit = reddit.subreddit(subreddit_name)

    end_date = datetime.utcnow()
    start_date = end_date - timedelta(days=time_frame_days)

    total_weighted_sentiment = 0
    total_weight = 0

    for post in subreddit.search(f"{ticker}", sort='new', time_filter='all', limit=post_limit):
        post_date = datetime.utcfromtimestamp(post.created_utc)
        if start_date <= post_date <= end_date and post.score >= min_upvotes:
            post_comments = len(post.comments)
            post_age = (end_date - post_date).total_seconds() / 3600  # in hours

            # Attempt to retrieve the user's karma
            try:
                post_karma = sum(post.author.karma().values()) if post.author else 0
            except AttributeError:
                post_karma = 0

            # Post engagement and user reputation weight
            post_weight = (post.score * post_comments) / (post_age + 1) * (post_karma + 1)

            post_analysis = TextBlob(post.title)
            post_sentiment_score = post_analysis.sentiment.polarity
            total_weighted_sentiment += post_sentiment_score * post_weight
            total_weight += post_weight

            # Analyze top comments
            post.comments.replace_more(limit=0)
            for comment in post.comments[:comment_limit]:
                comment_age = (end_date - datetime.utcfromtimestamp(comment.created_utc)).total_seconds() / 3600  # in hours

                # Attempt to retrieve the user's karma
                try:
                    comment_karma = sum(comment.author.karma().values()) if comment.author else 0
                except AttributeError:
                    comment_karma = 0

                # Comment engagement and user reputation weight
                comment_weight = (comment.score / (comment_age + 1)) * (comment_karma + 1)

                comment_analysis = TextBlob(comment.body)
                comment_sentiment_score = comment_analysis.sentiment.polarity
                total_weighted_sentiment += comment_sentiment_score * comment_weight
                total_weight += comment_weight

    average_weighted_sentiment = total_weighted_sentiment / total_weight if total_weight > 0 else 0
    return average_weighted_sentiment

def aggregate_subreddit_sentiment(subreddits, ticker):
    aggregated_sentiment = 0
    for subreddit in subreddits:
        sentiment = weighted_reddit_sentiment_analysis(subreddit, ticker)
        aggregated_sentiment += sentiment

    overall_sentiment = aggregated_sentiment / len(subreddits) if subreddits else 0
    sentiment_label = "Neutral"
    if overall_sentiment <= -0.35:
        sentiment_label = "Bearish"
    elif -0.35 < overall_sentiment <= -0.15:
        sentiment_label = "Somewhat-Bearish"
    elif 0.15 <= overall_sentiment < 0.35:
        sentiment_label = "Somewhat-Bullish"
    elif overall_sentiment >= 0.35:
        sentiment_label = "Bullish"

    return {"overall_sentiment_score": overall_sentiment, "overall_sentiment_label": sentiment_label}

def get_intraday_stock_data(symbol):
    ts = TimeSeries(key=alpha_api_key, output_format='pandas')
    data, _ = ts.get_intraday(symbol=symbol, interval='5min', outputsize='full')
    data.columns = ['Open', 'High', 'Low', 'Close', 'Volume']  # Rename columns
    return data

def time_aggregated_block_trades(data, time_window='1min', block_size=10000):
    # Resampling data to the desired time frame and summing volumes
    aggregated_data = data.resample(time_window).agg({'Open': 'first',
                                                      'Close': 'last',
                                                      'Volume': 'sum'})

    # Identifying large volume trades and their price impact
    block_trades = aggregated_data[aggregated_data['Volume'] >= block_size]
    block_trades['Price Impact'] = block_trades['Close'] - block_trades['Open']

    # Sorting by Volume and getting the top 10 largest trades
    top_block_trades = block_trades.nlargest(10, 'Volume')

    return top_block_trades


def detect_volume_anomalies(data, std_factor=3):
    # Using standard deviation to find significant deviations in volume
    avg_volume = data['Volume'].mean()
    std_volume = data['Volume'].std()
    threshold = avg_volume + std_factor * std_volume

    volume_anomalies = data[(data['Volume'] > threshold) | (data['Volume'] < avg_volume - std_factor * std_volume)]

    # Sorting by Volume and getting the top 10 largest anomalies
    top_anomalies = volume_anomalies.nlargest(10, 'Volume')

    return top_anomalies


def highlight_key_info(data):
    # Extracting and returning key details
    return data[['Open', 'Close', 'Volume']]

def weighted_volume_sentiment_analysis(data):
    # Assigning weights based on volume and price impact
    data['Weight'] = data['Volume'] * abs(data['Close'] - data['Open'])
    data['Sentiment'] = data.apply(lambda row: 1 if row['Close'] > row['Open'] else -1, axis=1)
    weighted_sentiment = sum(data['Weight'] * data['Sentiment']) / sum(data['Weight'])

    # Categorizing the sentiment
    if weighted_sentiment > 0:
        return "Bullish"
    elif weighted_sentiment < 0:
        return "Bearish"
    else:
        return "Neutral"

def calculate_net_institutional_trading(block_trades, date, ticker, net_institutional_trading):
    """
    Calculates and stores the net institutional trading for a given ticker on a specific date.

    Parameters:
    block_trades (DataFrame): DataFrame containing block trade data.
    date (datetime): The date for which to calculate net institutional trading.
    ticker (str): Ticker symbol of the stock.
    net_institutional_trading (dict): Dictionary to store the net institutional trading values.

    Returns:
    float: Net institutional trading value for the given date.
    """
    # Filter the block trades for the given date
    filtered_trades = block_trades[block_trades.index.date == date]

    # Calculate the total bought and sold for each day
    filtered_trades['Net'] = filtered_trades['Close'] - filtered_trades['Open']
    filtered_trades['Institutional Trading'] = filtered_trades['Net'] * filtered_trades['Volume']

    # Calculate the net institutional trading for the day
    net_value = filtered_trades['Institutional Trading'].sum()

    # Store the net value in the dictionary
    if ticker in net_institutional_trading:
        net_institutional_trading[ticker].append((date, net_value))
    else:
        net_institutional_trading[ticker] = [(date, net_value)]

    return net_value


def visualize_net_institutional_trading_today():
    # Create a bar graph of the net institutional trading for each ticker for today
    tickers, trading = zip(*[(ticker, net_institutional_trading[ticker][-1][1]) for ticker in symbols])
    colors = ['g' if x > 0 else 'r' for x in trading]
    plt.bar(tickers, trading, color=colors)
    plt.xlabel('Ticker')
    plt.ylabel('Net Institutional Trading')
    plt.title('Net Institutional Trading for Today')
    plt.show()

def visualize_net_institutional_trading_5_days():
    # Create a bar graph of the net institutional trading for each ticker for the most recent 5 days
    tickers, trading = zip(*[(ticker, sum([x[1] for x in net_institutional_trading[ticker]])) for ticker in symbols])
    colors = ['g' if x > 0 else 'r' for x in trading]
    plt.bar(tickers, trading, color=colors)
    plt.xlabel('Ticker')
    plt.ylabel('Net Institutional Trading')
    plt.title('Net Institutional Trading for the Most Recent 5 Days')
    plt.show()

symbols = ['SPY', 'MSFT', 'AAPL', 'AMZN', 'NVDA', 'GOOGL', 'META', 'GOOG', 'BRK-B', 'TSLA', 'UNH']

if __name__ == '__main__':
    subreddits = ['wallstreetbets', 'stocks']
    ticker = 'AAPL'
    #print(alpha_extract_and_calculate_sentiment(ticker, alpha_get_news_sentiment(ticker)))
    #print(aggregate_subreddit_sentiment(subreddits, ticker))
    print(type(time_aggregated_block_trades(get_intraday_stock_data(ticker))))
    print(type(detect_volume_anomalies(get_intraday_stock_data(ticker))))