11# -*- coding: utf-8 -*-
22from __future__ import division
3- import datelib
4- import re
3+ from __future__ import absolute_import
4+
55import operator
66
7- class ChatFeatures ():
7+ import re
8+ import datelib
89
10+
11+ class ChatFeatures :
912 def __init__ (self ):
10- self .root_response_time = []
13+ self .root_response_time = []
1114 self .contact_response_time = []
12- self .root_burst = []
13- self .contact_burst = []
14- self .initiations = {}
15- self .weekday = {}
16- self .shifts = {}
17- self .patterns = {}
18- self .proportions = {}
19- self .most_used_words = {}
20-
21- def compute_response_time_and_burst (self , list_of_messages , root_name , senders , initiation_thrs = (60 * 60 * 8 ), burst_thrs = 3 , response_thrs = (60 * 60 * 3 )):
15+ self .root_burst = []
16+ self .contact_burst = []
17+ self .initiations = {}
18+ self .weekday = {}
19+ self .shifts = {}
20+ self .patterns = {}
21+ self .proportions = {}
22+ self .most_used_words = {}
23+
24+ def compute_response_time_and_burst (self , list_of_messages , root_name , senders , initiation_thrs = (60 * 60 * 8 ),
25+ burst_thrs = 3 , response_thrs = (60 * 60 * 3 )):
2226 # perform the operations that are dependant on multiple messages
2327 # (response time, bursts)
2428 self .initiations = {}
@@ -27,30 +31,30 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
2731 t0 = list_of_messages [0 ].datetime_obj
2832 burst_count = 1
2933 for index , message in enumerate (list_of_messages ):
30- #skip the first message since we are looking at differences; note this means we don't count first msg as init
34+ # skip the first message since we are looking at differences; note this means we don't count first msg as init
3135 if index == 0 :
3236 continue
3337 t1 = message .datetime_obj
3438 dt = t1 - t0
3539 dt .total_seconds ()
3640
3741 # print "sender %s delta %s" % ( message.sender, dt.total_seconds() )
38- if ( dt .total_seconds () > initiation_thrs ) :
42+ if dt .total_seconds () > initiation_thrs :
3943 self .initiations [message .sender ] += 1
4044
4145 # is sender the same as the last message?
42- if message .sender != list_of_messages [index - 1 ].sender :
46+ if message .sender != list_of_messages [index - 1 ].sender :
4347 # sender changed, store the burst count and reset
44- #print "sender changed: %s" % ( message.sender )
45- #print "burst count: %s" % ( burst_count )
48+ # print "sender changed: %s" % ( message.sender )
49+ # print "burst count: %s" % ( burst_count )
4650
47- #print("response time: %d\n" %(dt.total_seconds()) )
51+ # print("response time: %d\n" %(dt.total_seconds()) )
4852 # is sender the root?
4953 if message .sender == root_name :
5054 # store the burst count for the last sender, which is the
5155 # opposite of current
5256 if burst_count > burst_thrs :
53- #print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
57+ # print "BURST CONTACT ENDED: %s IN A ROW" % ( burst_count )
5458 self .contact_burst .append (burst_count )
5559 if dt .total_seconds () < response_thrs :
5660 self .root_response_time .append (dt .total_seconds ())
@@ -59,24 +63,24 @@ def compute_response_time_and_burst(self, list_of_messages, root_name, senders,
5963 # store the burst count for the last sender, which is the
6064 # opposite of current
6165 if burst_count > burst_thrs :
62- #print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
66+ # print "BURST ROOT ENDED: %s IN A ROW" % ( burst_count )
6367 self .root_burst .append (burst_count )
6468 if dt .total_seconds () < response_thrs :
6569 self .contact_response_time .append (dt .total_seconds ())
66-
70+
6771 # End of the first burst, restart the counter
6872 burst_count = 1
6973
7074 else :
7175 # accumulate the number of messages sent in a row
7276 burst_count += 1
7377 t0 = t1
74- if burst_count > burst_thrs : # catch a burst if at end of chat
75- #print "final burst: %s" % ( burst_count )
76- if message .sender == root_name :
78+ if burst_count > burst_thrs : # catch a burst if at end of chat
79+ # print "final burst: %s" % ( burst_count )
80+ if message .sender == root_name :
7781 self .root_burst .append (burst_count )
7882 else :
79- self .contact_burst .append (burst_count )
83+ self .contact_burst .append (burst_count )
8084
8185 def compute_messages_per_weekday (self , list_of_messages ):
8286 self .weekday = {
@@ -105,16 +109,16 @@ def compute_messages_per_shift(self, list_of_messages):
105109 }
106110 for msg in list_of_messages :
107111 hour = int (msg .time .split (":" )[0 ])
108- if hour >= 0 and hour <= 6 :
112+ if 0 <= hour <= 6 :
109113 self .shifts ["latenight" ] += 1
110114
111- elif hour > 6 and hour <= 11 :
115+ elif 6 < hour <= 11 :
112116 self .shifts ["morning" ] += 1
113117
114- elif hour > 11 and hour <= 17 :
118+ elif 11 < hour <= 17 :
115119 self .shifts ["afternoon" ] += 1
116120
117- elif hour > 17 and hour <= 23 :
121+ elif 17 < hour <= 23 :
118122 self .shifts ["evening" ] += 1
119123 return self .shifts
120124
@@ -134,7 +138,7 @@ def compute_messages_pattern(self, list_of_messages, senders, pattern_list):
134138 if length > 0 :
135139 if pattern not in self .patterns :
136140 self .patterns [pattern ][msg .sender ] = length
137- print "This should never happen"
141+ print ( "This should never happen" )
138142 else :
139143 self .patterns [pattern ][msg .sender ] += length
140144 return self .patterns
@@ -149,10 +153,10 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
149153 self .proportions [i ][s ] = 0
150154 for msg in list_of_messages :
151155 self .proportions ["messages" ][msg .sender ] += 1
152- self .proportions ["words" ][msg .sender ] += len (msg .content .split (" " ))
153- self .proportions ["chars" ][msg .sender ] += len (msg .content .strip ())
154- self .proportions ["qmarks" ][msg .sender ] += msg .content .count ('?' )
155- self .proportions ["exclams" ][msg .sender ] += msg .content .count ('!' )
156+ self .proportions ["words" ][msg .sender ] += len (msg .content .split (" " ))
157+ self .proportions ["chars" ][msg .sender ] += len (msg .content .strip ())
158+ self .proportions ["qmarks" ][msg .sender ] += msg .content .count ('?' )
159+ self .proportions ["exclams" ][msg .sender ] += msg .content .count ('!' )
156160 self .proportions ["media" ][msg .sender ] += (
157161 msg .content .count ('<media omitted>' ) +
158162 msg .content .count ('<image omitted>' ) +
@@ -170,24 +174,24 @@ def compute_message_proportions(self, list_of_messages, senders, root, contact):
170174 self .proportions ["avg_words" ] = {}
171175 for s in senders :
172176 self .proportions ["avg_words" ][s ] = self .proportions ["words" ][s ] / self .proportions ["messages" ][s ]
173- self .proportions ["avg_words" ]["ratio" ] = self .proportions ["avg_words" ][root ] / self .proportions ["avg_words" ][contact ]
177+ self .proportions ["avg_words" ]["ratio" ] = self .proportions ["avg_words" ][root ] / self .proportions ["avg_words" ][
178+ contact ]
174179
175180 for c in categories :
176181 self .proportions [c ]["total" ] = 0
177182 for s in senders :
178183 self .proportions [c ]["total" ] += self .proportions [c ][s ]
179-
184+
180185 for c in categories :
181-
182- #if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
186+
187+ # if a value is 0, replace with a 1 to avoid zero erros in ratio calcs.
183188 if self .proportions [c ][contact ] == 0 :
184189 self .proportions [c ][contact ] = 1
185190 if self .proportions [c ][root ] == 0 :
186- self .proportions [c ][root ] = 1
191+ self .proportions [c ][root ] = 1
187192
188193 self .proportions [c ]["ratio" ] = self .proportions [c ][root ] / self .proportions [c ][contact ]
189194
190-
191195 return self .proportions
192196
193197 def compute_most_used_words (self , list_of_messages , top = 10 , threshold = 3 ):
@@ -204,37 +208,37 @@ def compute_most_used_words(self, list_of_messages, top=10, threshold=3):
204208 words_counter [w ] = 1
205209 else :
206210 words_counter [w ] += 1
207- sorted_words = sorted (words_counter .iteritems (), key = operator .itemgetter (1 ), reverse = True )
211+ sorted_words = sorted (words_counter .items (), key = operator .itemgetter (1 ), reverse = True )
208212 self .most_used_words = sorted_words [:top ]
209213 return self .most_used_words
210214
211215 def compute_avg_root_response_time (self ):
212- if ( len (self .root_response_time ) != 0 ) :
213- return sum (self .root_response_time )/ len (self .root_response_time )
216+ if len (self .root_response_time ) != 0 :
217+ return sum (self .root_response_time ) / len (self .root_response_time )
214218 return 0
215219
216220 def compute_avg_contact_response_time (self ):
217- if ( len (self .contact_response_time ) != 0 ) :
218- return sum (self .contact_response_time )/ len (self .contact_response_time )
221+ if len (self .contact_response_time ) != 0 :
222+ return sum (self .contact_response_time ) / len (self .contact_response_time )
219223 return 0
220224
221225 def compute_response_time_ratio (self , root , contact ):
222226 avg_root = self .compute_avg_root_response_time ()
223227 avg_contact = self .compute_avg_contact_response_time ()
224- if ( avg_contact != 0 ) :
228+ if avg_contact != 0 :
225229 return avg_root / avg_contact
226230 return 0
227231
228232 def compute_bursts_ratio (self , root , contact ):
229233 if (len (self .contact_burst )) == 0 :
230234 return len (self .root_burst ) / 1
231- if ( len (self .root_burst ) == 0 ) :
232- return ( 1 / len (self .contact_burst ) )
233- return len (self .root_burst )/ len (self .contact_burst )
235+ if len (self .root_burst ) == 0 :
236+ return 1 / len (self .contact_burst )
237+ return len (self .root_burst ) / len (self .contact_burst )
234238
235239 def compute_nbr_root_burst (self ):
236240 return len (self .root_burst )
237-
241+
238242 def compute_nbr_contact_burst (self ):
239243 return len (self .contact_burst )
240244
@@ -244,48 +248,41 @@ def compute_nbr_contact_burst(self):
244248 # return 0
245249
246250 def compute_avg_contact_burst (self ):
247- if ( len (self .contact_burst ) != 0 ) :
248- return sum (self .contact_burst )/ len (self .contact_burst )
251+ if len (self .contact_burst ) != 0 :
252+ return sum (self .contact_burst ) / len (self .contact_burst )
249253 return 0
250254
251255 def compute_root_initation_ratio (self , root , contact ):
252- if ( self .initiations [contact ] == 0 ) :
253- return self .initiations [root ]/ 1
254- if ( self .initiations [root ] == 0 ) :
255- return 1 / self .initiations [contact ]
256+ if self .initiations [contact ] == 0 :
257+ return self .initiations [root ] / 1
258+ if self .initiations [root ] == 0 :
259+ return 1 / self .initiations [contact ]
256260 return self .initiations [root ] / self .initiations [contact ]
257-
261+
258262 def generate_outcome (self , root , contact , methodology ):
259- outcome = 99 ;
263+ outcome = 99
260264 if methodology == 0 :
261- if ( self .compute_root_initation_ratio (root , contact ) > 0.867 ) :
262- outcome = 0 # "just not that into you"
263- #print "DOESNT INITIATE"
264- elif ( self .proportions ["qmarks" ]["ratio" ] > 0.87 ): # flipped the non-intutitive direction of inequality
265- outcome = 0 # "just not that into you"
266- #print "QUESTIONS FAIL"
265+ if self .compute_root_initation_ratio (root , contact ) > 0.867 :
266+ outcome = 0 # "just not that into you"
267+ # print "DOESNT INITIATE"
268+ elif self .proportions ["qmarks" ]["ratio" ] > 0.87 : # flipped the non-intuitive direction of inequality
269+ outcome = 0 # "just not that into you"
270+ # print "QUESTIONS FAIL"
267271 else :
268- outcome = 1 # "definitely into you"
269- #print "ELSE"
272+ outcome = 1 # "definitely into you"
273+ # print "ELSE"
270274 elif methodology == 1 :
271- if ( self .compute_root_initation_ratio (root , contact ) > 0.83 ) :
272- outcome = 0 # "just not that into you"
273- #print "DOESNT INITIATE"
274- elif ( self .features .compute_avg_root_response_time () < 0.92 ): # flipped non-intuitive direction of inequality
275- outcome = 0 # "just not that into you"
276- #print "QUESTIONS FAIL"
275+ if self .compute_root_initation_ratio (root , contact ) > 0.83 :
276+ outcome = 0 # "just not that into you"
277+ # print "DOESNT INITIATE"
278+ elif self .features .compute_avg_root_response_time () < 0.92 : # flipped non-intuitive direction of inequality
279+ outcome = 0 # "just not that into you"
280+ # print "QUESTIONS FAIL"
277281 else :
278- outcome = 1 # "definitely into you"
279- #print "ELSE"
282+ outcome = 1 # "definitely into you"
283+ # print "ELSE"
280284
281285 else :
282- outcome = 99 ;
283-
284- return outcome
285-
286- # qMarksPerRoot = qmarksRoot/messagesRoot
287- # qMarksPerContact = qmarksContact/messagesContact
288-
289-
290-
291-
286+ outcome = 99
287+
288+ return outcome
0 commit comments