diff --git a/README.md b/README.md index 8cce527..a014e22 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ # TextMining This is the base repo for the text mining and analysis project for Software Design at Olin College. + +Changed Readme so that I can have a release branch. +Accidently committed to master, so please excuse me for that. diff --git a/bbc.pickle b/bbc.pickle new file mode 100644 index 0000000..4d9abe2 Binary files /dev/null and b/bbc.pickle differ diff --git a/cnn.pickle b/cnn.pickle new file mode 100644 index 0000000..f80eee4 Binary files /dev/null and b/cnn.pickle differ diff --git a/dataAnalysis.py b/dataAnalysis.py new file mode 100644 index 0000000..0382fab --- /dev/null +++ b/dataAnalysis.py @@ -0,0 +1,28 @@ +import pickle +from bs4 import BeautifulSoup +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer + +def analyzer(filename): + input_file = open(filename, 'rb') + websitee = pickle.load(input_file) + + soup = BeautifulSoup(websitee, "html.parser") + + text = soup.get_text() + count = 0 + newtext = text.split() + for i in newtext: + if(i == "trump" or i == "Trump"): + count += 1 + analyze = SentimentIntensityAnalyzer() + scores = analyze.polarity_scores(text) + return count, scores + + +if __name__ == "__main__": + websites = ['bbc.pickle', 'cnn.pickle', 'foxnews.pickle'] + for i in websites: + count = analyzer(i)[0] + scores = analyzer(i)[1] + print("Number of times Trump is mentioned: " , count) + print("Positivity/Negativity score: ", scores) diff --git a/foxnews.pickle b/foxnews.pickle new file mode 100644 index 0000000..6745918 Binary files /dev/null and b/foxnews.pickle differ diff --git a/sodezz.pdf b/sodezz.pdf new file mode 100644 index 0000000..be6a3e5 Binary files /dev/null and b/sodezz.pdf differ diff --git a/textmining.py b/textmining.py new file mode 100644 index 0000000..755e839 --- /dev/null +++ b/textmining.py @@ -0,0 +1,7 @@ +import requests +import pickle +website = 'http://cnn.com' +sampleText = requests.get(website).text +f = open('cnn.pickle', 'wb') +pickle.dump(sampleText, f) +f.close()