Skip to content

Commit 8d0ab0e

Browse files
committed
working towards py3 compatibility
- fixed imports - print statements
1 parent 173fe9e commit 8d0ab0e

7 files changed

Lines changed: 176 additions & 171 deletions

File tree

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
language: python
22
python:
33
- "2.7"
4-
# - "3.4"
5-
# - "3.5"
4+
- "3.4"
5+
- "3.5"
66
# - "3.6"
77
#install:
88
# - pip install .

wp_parser/ChatFeatures.py

Lines changed: 83 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,28 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import division
3-
import datelib
4-
import re
3+
from __future__ import absolute_import
4+
55
import operator
66

7-
class ChatFeatures():
7+
import re
8+
import datelib
89

10+
11+
class ChatFeatures:
912
def __init__(self):
10-
self.root_response_time = []
13+
self.root_response_time = []
1114
self.contact_response_time = []
12-
self.root_burst = []
13-
self.contact_burst = []
14-
self.initiations = {}
15-
self.weekday = {}
16-
self.shifts = {}
17-
self.patterns = {}
18-
self.proportions = {}
19-
self.most_used_words = {}
20-
21-
def compute_response_time_and_burst(self, list_of_messages, root_name, senders, initiation_thrs=(60*60*8), burst_thrs=3, response_thrs=(60*60*3)):
15+
self.root_burst = []
16+
self.contact_burst = []
17+
self.initiations = {}
18+
self.weekday = {}
19+
self.shifts = {}
20+
self.patterns = {}
21+
self.proportions = {}
22+
self.most_used_words = {}
23+
24+
def compute_response_time_and_burst(self, list_of_messages, root_name, senders, initiation_thrs=(60 * 60 * 8),
25+
burst_thrs=3, response_thrs=(60 * 60 * 3)):
2226
# perform the operations that are dependant on multiple messages
2327
# (response time, bursts)
2428
self.initiations = {}
@@ -27,30 +31,30 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
2731
t0 = list_of_messages[0].datetime_obj
2832
burst_count = 1
2933
for index, message in enumerate(list_of_messages):
30-
#skip the first message since we are looking at differences; note this means we don't count first msg as init
34+
# skip the first message since we are looking at differences; note this means we don't count first msg as init
3135
if index == 0:
3236
continue
3337
t1 = message.datetime_obj
3438
dt = t1 - t0
3539
dt.total_seconds()
3640

3741
# print "sender %s delta %s" % ( message.sender, dt.total_seconds() )
38-
if (dt.total_seconds() > initiation_thrs):
42+
if dt.total_seconds() > initiation_thrs:
3943
self.initiations[message.sender] += 1
4044

4145
# is sender the same as the last message?
42-
if message.sender != list_of_messages[index-1].sender:
46+
if message.sender != list_of_messages[index - 1].sender:
4347
# sender changed, store the burst count and reset
44-
#print "sender changed: %s" % ( message.sender )
45-
#print "burst count: %s" % ( burst_count )
48+
# print "sender changed: %s" % ( message.sender )
49+
# print "burst count: %s" % ( burst_count )
4650

47-
#print("response time: %d\n" %(dt.total_seconds()) )
51+
# print("response time: %d\n" %(dt.total_seconds()) )
4852
# is sender the root?
4953
if message.sender == root_name:
5054
# store the burst count for the last sender, which is the
5155
# opposite of current
5256
if burst_count > burst_thrs:
53-
#print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
57+
# print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
5458
self.contact_burst.append(burst_count)
5559
if dt.total_seconds() < response_thrs:
5660
self.root_response_time.append(dt.total_seconds())
@@ -59,24 +63,24 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
5963
# store the burst count for the last sender, which is the
6064
# opposite of current
6165
if burst_count > burst_thrs:
62-
#print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
66+
# print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
6367
self.root_burst.append(burst_count)
6468
if dt.total_seconds() < response_thrs:
6569
self.contact_response_time.append(dt.total_seconds())
66-
70+
6771
# End of the first burst, restart the counter
6872
burst_count = 1
6973

7074
else:
7175
# accumulate the number of messages sent in a row
7276
burst_count += 1
7377
t0 = t1
74-
if burst_count > burst_thrs: #catch a burst if at end of chat
75-
#print "final burst: %s" % ( burst_count )
76-
if message.sender == root_name:
78+
if burst_count > burst_thrs: # catch a burst if at end of chat
79+
# print "final burst: %s" % ( burst_count )
80+
if message.sender == root_name:
7781
self.root_burst.append(burst_count)
7882
else:
79-
self.contact_burst.append(burst_count)
83+
self.contact_burst.append(burst_count)
8084

8185
def compute_messages_per_weekday(self, list_of_messages):
8286
self.weekday = {
@@ -105,16 +109,16 @@ def compute_messages_per_shift(self, list_of_messages):
105109
}
106110
for msg in list_of_messages:
107111
hour = int(msg.time.split(":")[0])
108-
if hour >= 0 and hour <= 6:
112+
if 0 <= hour <= 6:
109113
self.shifts["latenight"] += 1
110114

111-
elif hour > 6 and hour <= 11:
115+
elif 6 < hour <= 11:
112116
self.shifts["morning"] += 1
113117

114-
elif hour > 11 and hour <= 17:
118+
elif 11 < hour <= 17:
115119
self.shifts["afternoon"] += 1
116120

117-
elif hour > 17 and hour <= 23:
121+
elif 17 < hour <= 23:
118122
self.shifts["evening"] += 1
119123
return self.shifts
120124

@@ -134,7 +138,7 @@ def compute_messages_pattern(self, list_of_messages, senders, pattern_list):
134138
if length > 0:
135139
if pattern not in self.patterns:
136140
self.patterns[pattern][msg.sender] = length
137-
print "This should never happen"
141+
print("This should never happen")
138142
else:
139143
self.patterns[pattern][msg.sender] += length
140144
return self.patterns
@@ -149,10 +153,10 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
149153
self.proportions[i][s] = 0
150154
for msg in list_of_messages:
151155
self.proportions["messages"][msg.sender] += 1
152-
self.proportions["words"][msg.sender] += len(msg.content.split(" "))
153-
self.proportions["chars"][msg.sender] += len(msg.content.strip())
154-
self.proportions["qmarks"][msg.sender] += msg.content.count('?')
155-
self.proportions["exclams"][msg.sender] += msg.content.count('!')
156+
self.proportions["words"][msg.sender] += len(msg.content.split(" "))
157+
self.proportions["chars"][msg.sender] += len(msg.content.strip())
158+
self.proportions["qmarks"][msg.sender] += msg.content.count('?')
159+
self.proportions["exclams"][msg.sender] += msg.content.count('!')
156160
self.proportions["media"][msg.sender] += (
157161
msg.content.count('<media omitted>') +
158162
msg.content.count('<image omitted>') +
@@ -170,24 +174,24 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
170174
self.proportions["avg_words"] = {}
171175
for s in senders:
172176
self.proportions["avg_words"][s] = self.proportions["words"][s] / self.proportions["messages"][s]
173-
self.proportions["avg_words"]["ratio"] = self.proportions["avg_words"][root] / self.proportions["avg_words"][contact]
177+
self.proportions["avg_words"]["ratio"] = self.proportions["avg_words"][root] / self.proportions["avg_words"][
178+
contact]
174179

175180
for c in categories:
176181
self.proportions[c]["total"] = 0
177182
for s in senders:
178183
self.proportions[c]["total"] += self.proportions[c][s]
179-
184+
180185
for c in categories:
181-
182-
#if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
186+
187+
# if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
183188
if self.proportions[c][contact] == 0:
184189
self.proportions[c][contact] = 1
185190
if self.proportions[c][root] == 0:
186-
self.proportions[c][root] = 1
191+
self.proportions[c][root] = 1
187192

188193
self.proportions[c]["ratio"] = self.proportions[c][root] / self.proportions[c][contact]
189194

190-
191195
return self.proportions
192196

193197
def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
@@ -204,37 +208,37 @@ def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
204208
words_counter[w] = 1
205209
else:
206210
words_counter[w] += 1
207-
sorted_words = sorted(words_counter.iteritems(), key=operator.itemgetter(1), reverse=True)
211+
sorted_words = sorted(words_counter.items(), key=operator.itemgetter(1), reverse=True)
208212
self.most_used_words = sorted_words[:top]
209213
return self.most_used_words
210214

211215
def compute_avg_root_response_time(self):
212-
if (len(self.root_response_time) != 0):
213-
return sum(self.root_response_time)/len(self.root_response_time)
216+
if len(self.root_response_time) != 0:
217+
return sum(self.root_response_time) / len(self.root_response_time)
214218
return 0
215219

216220
def compute_avg_contact_response_time(self):
217-
if (len(self.contact_response_time) != 0):
218-
return sum(self.contact_response_time)/len(self.contact_response_time)
221+
if len(self.contact_response_time) != 0:
222+
return sum(self.contact_response_time) / len(self.contact_response_time)
219223
return 0
220224

221225
def compute_response_time_ratio(self, root, contact):
222226
avg_root = self.compute_avg_root_response_time()
223227
avg_contact = self.compute_avg_contact_response_time()
224-
if (avg_contact != 0):
228+
if avg_contact != 0:
225229
return avg_root / avg_contact
226230
return 0
227231

228232
def compute_bursts_ratio(self, root, contact):
229233
if (len(self.contact_burst)) == 0:
230234
return len(self.root_burst) / 1
231-
if (len(self.root_burst) == 0):
232-
return ( 1/len(self.contact_burst))
233-
return len(self.root_burst)/len(self.contact_burst)
235+
if len(self.root_burst) == 0:
236+
return 1 / len(self.contact_burst)
237+
return len(self.root_burst) / len(self.contact_burst)
234238

235239
def compute_nbr_root_burst(self):
236240
return len(self.root_burst)
237-
241+
238242
def compute_nbr_contact_burst(self):
239243
return len(self.contact_burst)
240244

@@ -244,48 +248,41 @@ def compute_nbr_contact_burst(self):
244248
# return 0
245249

246250
def compute_avg_contact_burst(self):
247-
if (len(self.contact_burst) != 0):
248-
return sum(self.contact_burst)/len(self.contact_burst)
251+
if len(self.contact_burst) != 0:
252+
return sum(self.contact_burst) / len(self.contact_burst)
249253
return 0
250254

251255
def compute_root_initation_ratio(self, root, contact):
252-
if (self.initiations[contact] == 0):
253-
return self.initiations[root]/1
254-
if (self.initiations[root] == 0):
255-
return 1/self.initiations[contact]
256+
if self.initiations[contact] == 0:
257+
return self.initiations[root] / 1
258+
if self.initiations[root] == 0:
259+
return 1 / self.initiations[contact]
256260
return self.initiations[root] / self.initiations[contact]
257-
261+
258262
def generate_outcome(self, root, contact, methodology):
259-
outcome = 99;
263+
outcome = 99
260264
if methodology == 0:
261-
if (self.compute_root_initation_ratio(root, contact) > 0.867):
262-
outcome = 0 #"just not that into you"
263-
#print "DOESNT INITIATE"
264-
elif (self.proportions["qmarks"]["ratio"] > 0.87): #flipped the non-intutitive direction of inequality
265-
outcome = 0 #"just not that into you"
266-
#print "QUESTIONS FAIL"
265+
if self.compute_root_initation_ratio(root, contact) > 0.867:
266+
outcome = 0 # "just not that into you"
267+
# print "DOESNT INITIATE"
268+
elif self.proportions["qmarks"]["ratio"] > 0.87: # flipped the non-intuitive direction of inequality
269+
outcome = 0 # "just not that into you"
270+
# print "QUESTIONS FAIL"
267271
else:
268-
outcome = 1 #"definitely into you"
269-
#print "ELSE"
272+
outcome = 1 # "definitely into you"
273+
# print "ELSE"
270274
elif methodology == 1:
271-
if (self.compute_root_initation_ratio(root, contact) > 0.83):
272-
outcome = 0 #"just not that into you"
273-
#print "DOESNT INITIATE"
274-
elif (self.features.compute_avg_root_response_time() < 0.92): #flipped non-intuitive direction of inequality
275-
outcome = 0 #"just not that into you"
276-
#print "QUESTIONS FAIL"
275+
if self.compute_root_initation_ratio(root, contact) > 0.83:
276+
outcome = 0 # "just not that into you"
277+
# print "DOESNT INITIATE"
278+
elif self.features.compute_avg_root_response_time() < 0.92: # flipped non-intuitive direction of inequality
279+
outcome = 0 # "just not that into you"
280+
# print "QUESTIONS FAIL"
277281
else:
278-
outcome = 1 #"definitely into you"
279-
#print "ELSE"
282+
outcome = 1 # "definitely into you"
283+
# print "ELSE"
280284

281285
else:
282-
outcome = 99;
283-
284-
return outcome
285-
286-
# qMarksPerRoot = qmarksRoot/messagesRoot
287-
# qMarksPerContact = qmarksContact/messagesContact
288-
289-
290-
291-
286+
outcome = 99
287+
288+
return outcome

wp_parser/datelib.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1+
import time
12
from datetime import date
23
from datetime import datetime
34
from datetime import timedelta
4-
import time
55

66

77
# get current ymd
@@ -37,11 +37,13 @@ def valid_date(date_str):
3737

3838
return valid
3939

40+
4041
def date_diff(dateobj1, dateobj2):
4142
import math
4243
delta = dateobj2 - dateobj1
4344
return int(math.fabs(delta.days))
4445

46+
4547
def datecmp(date1, date2):
4648
year, month, day = date_split(date1)
4749
year_t, month_t, day_t = date_split(date2)
@@ -53,8 +55,8 @@ def datecmp(date1, date2):
5355
else:
5456
return 1
5557
except ValueError:
56-
#misc.error("Fix me! Invalid date", "datecmp")
57-
print "Fix me! Invalid date"
58+
# misc.error("Fix me! Invalid date", "datecmp")
59+
print("Fix me! Invalid date")
5860
return False
5961

6062

@@ -65,7 +67,7 @@ def date_operation(date_str, num):
6567
return end_date
6668

6769

68-
def date_to_str(date_str):
70+
def date_to_str():
6971
return date.strftime('%Y-%m-%d')
7072

7173

@@ -89,7 +91,7 @@ def date_interval(initial_date, length, step=1, separator="-"):
8991
output = []
9092
current = start_date
9193
while current < end_date:
92-
output.append(date_to_str(current))
94+
output.append(date_to_str())
9395
current += timedelta(days=step)
9496

9597
return output
@@ -119,5 +121,6 @@ def weekday_portuguese_to_english(string):
119121
elif string == "sab" or string == "sabado":
120122
return "Saturday"
121123

124+
122125
if __name__ == "__main__":
123-
print date_diff(datetime(2015, 6, 4), datetime(2015, 07, 7))
126+
print(date_diff(datetime(2015, 6, 4), datetime(2015, 7, 7)))

0 commit comments

Comments
 (0)