SENTISCOPE/sentiment_analysis.py at main · kingsman1960/SENTISCOPE · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
from flair.nn import Classifier
from flair.data import Sentence

nltk.download('vader_lexicon')

class SentimentAnalyzer:
    def __init__(self):
        self.finbert_tokenizer, self.finbert_model = self._load_finbert()
        self.esgbert_tokenizer, self.esgbert_model = self._load_esgbert()
        self.finbert_tone_tokenizer, self.finbert_tone_model = self._load_finbert_tone()
        self.flair_sentiment_model = Classifier.load('en-sentiment')

    def _load_finbert(self):
        tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
        model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
        return tokenizer, model

    def _load_esgbert(self):
        tokenizer = AutoTokenizer.from_pretrained("nbroad/ESG-BERT")
        model = AutoModelForSequenceClassification.from_pretrained("nbroad/ESG-BERT")
        return tokenizer, model

    def _load_finbert_tone(self):
        tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
        model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
        return tokenizer, model

    def analyze_sentiment_finbert(self, text):
        inputs = self.finbert_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.finbert_model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
        sentiment_scores = probabilities[0].tolist()
        labels = ['Negative', 'Neutral', 'Positive']
        return {label: score for label, score in zip(labels, sentiment_scores)}

    def analyze_sentiment_vader(self, text):
        sia = SentimentIntensityAnalyzer()
        return sia.polarity_scores(text)

    def analyze_sentiment_esgbert(self, text):
        inputs = self.esgbert_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.esgbert_model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
        sentiment_scores = probabilities[0].tolist()
        labels = ['Negative', 'Neutral', 'Positive']
        return {label: score for label, score in zip(labels, sentiment_scores)}

    def analyze_sentiment_finbert_tone(self, text):
        inputs = self.finbert_tone_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.finbert_tone_model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
        sentiment_scores = probabilities[0].tolist()
        labels = ['Negative', 'Neutral', 'Positive']
        return {label: score for label, score in zip(labels, sentiment_scores)}

    def analyze_sentiment_flair(self, text):
        sentence = Sentence(text)
        self.flair_sentiment_model.predict(sentence)
        return {'sentiment': sentence.labels[0].value, 'score': sentence.labels[0].score}