-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmatrix_factorization.py
More file actions
122 lines (105 loc) · 3.23 KB
/
matrix_factorization.py
File metadata and controls
122 lines (105 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 19 12:24:43 2019
@author: HARSH
"""
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
#<------------------loading the data------------->
import pickle
(f,g,h) = open("um2r.json","rb"), open("user2movie.json","rb"),open("movie2user.json","rb")
user2movie = pickle.load(g)
movie2user = pickle.load(h)
usermovie2rating = pickle.load(f)
f.close()
g.close()
h.close()
i = open("usermovie2rating_test.json","rb")
usermovie2rating_test = pickle.load(i)
i.close()
#<---------------defining sum of squared losses----------------------------->
def loss(d):
sse =0
for (user,movie),rating in d.items():
my_pred = W[user].dot(U[movie]) + b[user] + c[movie] + mu
loss = my_pred - rating
se = loss**2
sse+=se
return sse/len(d)
#<-------------------------------randomly initaialising the paraeters----------->
K = 10
N= len(user2movie)
M = len(movie2user)
W = np.random.normal(3.4283012978499814,1.0220310184294517,(N,K))
b = np.zeros(N)
U = np.random.normal(3.4283012978499814,1.0220310184294517,(M,K))
c = np.zeros(M)
mu = np.mean(list(usermovie2rating.values()))
#<---------------model----------------------->\
train_losses,test_losses =[],[]
epochs = 25
reg = 0.23
for epoch in range(epochs):
print("epoch:", epoch)
epoch_start = datetime.now()
# perform updates
# update W and b
t0 = datetime.now()
for i in range(N):
# for W
matrix = np.eye(K) * reg
vector = np.zeros(K)
# for b
bi = 0
for j in user2movie[i]:
r = usermovie2rating[(i,j)]
matrix += np.outer(U[j], U[j])
vector += (r - b[i] - c[j] - mu)*U[j]
bi += (r - W[i].dot(U[j]) - c[j] - mu)
# set the updates
W[i] = np.linalg.solve(matrix, vector)
b[i] = bi / (len(user2movie[i]) + reg)
if i % (N//10) == 0:
print("i:", i, "N:", N)
print("updated W and b:", datetime.now() - t0)
# update U and c
t0 = datetime.now()
for j in range(M):
# for U
matrix = np.eye(K) * reg
vector = np.zeros(K)
# for c
cj = 0
try:
for i in movie2user[j]:
r = usermovie2rating[(i,j)]
matrix += np.outer(W[i], W[i])
vector += (r - b[i] - c[j] - mu)*W[i]
cj += (r - W[i].dot(U[j]) - b[i] - mu)
# set the updates
U[j] = np.linalg.solve(matrix, vector)
c[j] = cj / (len(movie2user[j]) + reg)
if j % (M//10) == 0:
print("j:", j, "M:", M)
except KeyError:
# possible not to have any ratings for a movie
pass
print("updated U and c:", datetime.now() - t0)
print("epoch duration:", datetime.now() - epoch_start)
# store train loss
t0 = datetime.now()
train_losses.append(loss(usermovie2rating))
# store test loss
test_losses.append(loss(usermovie2rating_test))
print("calculate cost:", datetime.now() - t0)
print("train loss:", train_losses[-1])
print("test loss:", test_losses[-1])
print("train losses:", train_losses)
print("test losses:", test_losses)
# plot losses
plt.plot(train_losses, label="train loss")
plt.scatter(range(25),test_losses, label="test loss")
plt.legend()
plt.show()
t1 = datetime.now()