forked from haelinnie/codeology-fa19-dev
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUtility.py
More file actions
64 lines (54 loc) · 2.08 KB
/
Utility.py
File metadata and controls
64 lines (54 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- SIMILARITY -*-
def cos_sim(a, b):
"""
Return the cosine similarity between vector a and b.
"""
numerator, sosA, sosB = 0, 0, 0 #sum of squares for denom
for i in movies: #replace movies
numerator += (ratingai * ratingbi)
sosA += ratingai * ratingai
sosB += ratingbi * ratingbi
denom = sqrt(sosA * sosB) #make sure denom won't be 0!
return numerator / denom
def adj_cos_sim(a, b, avg_a, avg_b):
"""
Return the adjusted cosine similarity between vector a and b.
"""
adj_a, adj_b = [], []
for val in a: #replace a, not necessarily a vector if we do OOP
adj_a.append(val - avg_a) #negative res
for val in b:
adj_b.append(val - avg_b)
return cos_sim(adj_a, adj_b) #is this slower than if I wrote it out/expanded it?
# -*- DATA RETRIEVAL -*-
def listGenres(movieId):
"""
Takes in a movie's ID and returns a list of strings representing the genres
pertaining to the movie.
Access the movies dataframe to convert the large concatenated string
delimited by '|'.
"""
genres = movies.at[movieId, 'genres'] #change movies to whatever variable name the movies df has
genres = genres.split('|')
return genres
# -*- MODEL EVALUATION -*-
def mae(predicted, actual):
"""
Takes in a list of predicted ratings and a list of actual ratings for a set
of movies and computes the mean absolute error of our predictions.
"""
#maybe make some assertions, assume have same length & in right order
interm_total = 0
for i in range(len(predicted)):
interm_total += abs(predicted[i] - actual[i])
return interm_total / len(predicted)
def rmse(predicted, actual):
"""
Takes in a list of predicted ratings and a list of actual ratings for a set
of movies and computes the root mean square error of our predictions.
"""
#maybe make some assertions, assume have same length & in right order
interm_total = 0
for i in range(len(predicted)):
interm_total += (predicted[i] - actual[i]) ** 2
return sqrt(interm_total / len(predicted))