-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaddTweetsToTraining.py
More file actions
66 lines (56 loc) · 2 KB
/
addTweetsToTraining.py
File metadata and controls
66 lines (56 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#Patrick Wilson
#EECS 498 HW 3
#!/usr/bin/env python
import re
import sys
import os
import operator
import math
import cProfile
import numpy
from sentstotweets import sentstotweets
def removePunc(input):
pattern = re.sub("\@","",input)
pattern = re.sub("\!","",pattern)
pattern = re.sub("\&","",pattern)
pattern = re.sub("\+","",pattern)
pattern = re.sub("\?","",pattern)
pattern = re.sub("',","",pattern)
pattern = re.sub("\'","",pattern)
pattern = re.sub("u'","",pattern)
pattern = re.sub("\$","",pattern)
pattern = re.sub("\(","",pattern)
pattern = re.sub("\)","",pattern)
pattern = re.sub("\#","",pattern)
pattern = re.sub("\*","",pattern)
pattern = re.sub("\/","",pattern)
pattern = re.sub("\.","",pattern)
pattern = re.sub("\,","",pattern)
pattern = re.sub("\=","",pattern)
pattern = re.sub("\-","",pattern)
pattern = re.sub("\_","",pattern)
pattern = re.sub("\:","",pattern)
pattern = re.sub("\;","",pattern)
pattern = re.sub("\"","",pattern)
pattern = re.sub("\[","",pattern)
pattern = re.sub("\]","",pattern)
pattern = re.sub(r"http","",pattern)
#pattern = re.sub("\\","",pattern)
return pattern
sys.stdout.encoding.encode('utf-8')
emotionlist = ['#love OR #attached OR #devotion', '#happy OR #elated',
'#amused OR #excited OR #firedup',
'#blessed OR #grateful', '#sad OR #depressed OR #heartbroken',
'#angry OR #mad OR #infuriated', '#afraid OR #scared OR #terrified',
'#humiliating OR #embarrassing OR #ashamed']
tweets = sentstotweets()
inputsteeeeez = tweets.top50Tweets()
for words in emotionlist:
training = inputsteeeeez[words] # training is list of tweets for each emotion
readfile = removePunc(str(training))
#readfile = removeStopWords(readfile)
readfile = readfile.encode('utf-8').strip()
test_set = readfile.split()
with open('tweetlists/' + words+".txt", "a") as myfile:
myfile.seek(0)
myfile.write(readfile)