From 3bde8fdb24eafde505f5ad3df3de60d29aed316a Mon Sep 17 00:00:00 2001 From: Kian Raissian Date: Thu, 23 Feb 2017 15:08:07 -0500 Subject: [PATCH 1/2] Turning inmini project 3. Sorry for the MVP --- project.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 project.py diff --git a/project.py b/project.py new file mode 100644 index 0000000..05baa5e --- /dev/null +++ b/project.py @@ -0,0 +1,28 @@ +import wikipedia +import nltk +import string +page1=wikipedia.page('Olin College') +list1=nltk.word_tokenize(page1.content) +page2=wikipedia.page('Stanford University') +list2=nltk.word_tokenize(page2.content) +concatlist1=[] +concatlist2[] + +def no_repeat(list1, list2): + for i in list1: + if i not in concatlist1: + concatlist1.append(i) + for i in list2: + if i not in concatlist2: + concatlist2.append(i) + return concatlist1, concatlist2 + +compiled=[] +def similarity(concatlist1, concatlist2): + i=0 + for i in list1[i]: + if list1[i] in list2: + compiled.append[list1] + else: + i+=1 + return compiled From 3f5a6a95ee286174ae75386b27b575cbf9b576f6 Mon Sep 17 00:00:00 2001 From: Kian Raissian Date: Mon, 24 Apr 2017 14:53:45 -0400 Subject: [PATCH 2/2] Updating for MP5 --- project.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/project.py b/project.py index 05baa5e..d3192df 100644 --- a/project.py +++ b/project.py @@ -1,12 +1,15 @@ import wikipedia import nltk import string +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer +import pickle page1=wikipedia.page('Olin College') list1=nltk.word_tokenize(page1.content) page2=wikipedia.page('Stanford University') list2=nltk.word_tokenize(page2.content) concatlist1=[] concatlist2[] +from collections import Counter def no_repeat(list1, list2): for i in list1: @@ -17,6 +20,7 @@ def no_repeat(list1, list2): concatlist2.append(i) return concatlist1, concatlist2 + compiled=[] def similarity(concatlist1, concatlist2): i=0 @@ -26,3 +30,33 @@ def similarity(concatlist1, concatlist2): else: i+=1 return compiled + +analyzer = SentimentIntensityAnalyzer() +analyzer.polarity_scores(concatlist1) +analyzer.polarity_scores(concatlist2) + +def pickle(list1, list2): + f1 = open('list1.pickle','wb') + pickle.dump(list1,f) + f1.close() + f2 = open('list2.pickle','wb') + pickle.dump(list2,f) + f2.close() + return f1, f2 + + +def process_file(f1,f2): + hist= dict() + fp1=open(f1) + for item in fp1: + hist[item]=hist.get(word,0)+1 + return hist + + # counts1=Counter(list1) + # counts2=Counter(list2) + # return counts1 + # return counts2 + +def counter(concatlist1, concatlist2): + for i in concatlist1: + concatlist1.count(i)