-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtester.py
More file actions
executable file
·104 lines (96 loc) · 4.28 KB
/
tester.py
File metadata and controls
executable file
·104 lines (96 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import ndcg_score, f1_score
import numpy as np
import pandas as pd
import torch
# EASE model copied due to issues with Slurm
class EASE:
def __init__(self, df, implicit=False, reg=0.05):
self.reg = reg
self.indices = torch.LongTensor(df[["user_id", "item_id"]].values)
if implicit:
self.values = torch.ones(self.indices.shape[0])
else:
self.values = torch.FloatTensor(df["rating"].to_numpy())
self.sparse = torch.sparse.FloatTensor(self.indices.t(), self.values)
def fit(self):
# Sparse matrix multiplication (G = X^T * X)
G = torch.sparse.mm(self.sparse.t(), self.sparse)
# Since the regularization term and inverse operation require dense matrices,
# we selectively convert the relevant parts to dense.
G_dense = G.to_dense()
G_dense += torch.eye(G_dense.shape[0]) * self.reg
# Inverse operation (still requires dense matrix)
P = G_dense.inverse()
# Remaining operations
B = P / (-1 * P.diag())
B = B + torch.eye(B.shape[0])
self.B = B
return
def predict(self, pred_df, k=5, remove_owned=True):
unique_users = torch.LongTensor(pred_df["user_id"].unique())
user_tensor = self.sparse.to_dense().index_select(dim=0, index=unique_users)
preds_tensor = user_tensor @ self.B - user_tensor * remove_owned
top_k_indices = preds_tensor.topk(k, dim=1).indices
pred_items = [indices.numpy() for indices in top_k_indices]
return pd.DataFrame({'user_id': unique_users, 'predicted_items': pred_items})
# Copy of metric module due to issues with Slurm
def calculate_metrics(predictions, ground_truth, k):
precision_at_k = []
recall_at_k = []
ndcg_at_k = []
for user_id in predictions['user_id']:
pred_items = set(predictions[predictions['user_id'] == user_id]['predicted_items'].iloc[0][:k])
if user_id in ground_truth['user_id'].values:
actual_items_tuple = tuple(ground_truth[ground_truth['user_id'] == user_id]['actual_items'].iloc[0])
actual_items = set(actual_items_tuple)
else:
actual_items = set()
num_relevant_items = len(actual_items.intersection(pred_items))
precision = num_relevant_items / len(pred_items) if pred_items else 0
recall = num_relevant_items / len(actual_items) if actual_items else 0
precision_at_k.append(precision)
recall_at_k.append(recall)
relevance = [1 if item in actual_items else 0 for item in pred_items]
ndcg_at_k.append(ndcg_score([relevance], [np.ones(len(relevance))]) if relevance else 0)
avg_precision = np.mean(precision_at_k) if precision_at_k else 0
avg_recall = np.mean(recall_at_k) if recall_at_k else 0
avg_ndcg = np.mean(ndcg_at_k) if ndcg_at_k else 0
avg_f1 = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall) if (avg_precision + avg_recall) > 0 else 0
return avg_precision, avg_recall, avg_ndcg, avg_f1
# Get a dataframe from a csv file
def get_data(i):
return pd.read_csv(f'data/data_{i}.d')
def main():
num_data_files = 20
n = 0
s = 1
if 'WORLDSIZE' in os.environ:
s = int(os.environ["WORLDSIZE"])
if 'SLURM_PROCID' in os.environ:
n = int(os.environ["SLURM_PROCID"])
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
df = pd.DataFrame()
for i in range(n+1, num_data_files+1, s):
df = pd.concat([df, get_data(i)])
df["user_id"] = user_encoder.fit_transform(df["user_id"])
df["item_id"] = item_encoder.fit_transform(df["item_id"])
model = EASE(df, 10)
model.B = torch.load('log/overall')
predictions = model.predict(df, 10, False)
predictions['user_id'] = user_encoder.inverse_transform(predictions['user_id'])
predictions['predicted_items'] = predictions['predicted_items'].apply(lambda x: item_encoder.inverse_transform(x))
ground_truth = df.groupby('user_id')['item_id'].apply(list).reset_index()
ground_truth.rename(columns={'item_id': 'actual_items'}, inplace=True)
ground_truth['user_id'] = user_encoder.inverse_transform(ground_truth['user_id'])
ground_truth['actual_items'] = ground_truth['actual_items'].apply(lambda x: item_encoder.inverse_transform(x))
k = 10
precision, recall, ndcg, f1 = calculate_metrics(predictions, ground_truth, k)
print(f"Precision@{k}: {precision}")
print(f"Recall@{k}: {recall}")
print(f"NDCG@{k}: {ndcg}")
print(f"F1@{k}: {f1}")
if __name__ == '__main__':
main()