-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathMetric.py
More file actions
executable file
·96 lines (83 loc) · 3.51 KB
/
Metric.py
File metadata and controls
executable file
·96 lines (83 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/python
#-*-coding:utf-8-*-
'''@author:duncan'''
import numpy as np
from networkx.classes.function import all_neighbors
GroundTruth_Path = "./GroundTruth/"
class Metrics:
def __init__(self,users,R,g):
# 用户数据
self.users = users
# 用户总个数
self.num = len(users)
# 代表性矩阵
self.R = R
# 在图中对应的节点编号
# 关系图
self.g = g
# 转换一下索引结构
self.index = {userid: users[users.userid == userid].index[0] for userid in users.userid}
# 根据pandas里的索引返回对应的userid
def GetID(self,profiles):
return list(self.users.loc[profiles]['userid'])
# 获得用户的邻居
def GetNeighbours(self,u):
if str(u) in self.g:
return set([int(i) for i in all_neighbors(self.g,str(u))])
else:
return set()
# 计算特征代表性
def Rc(self,users,profiles):
'''
:param users: DataFrame类型
:param profiles: 代表性候选集
:param R: 代表性矩阵
:return: 返回代表性值
'''
# 找到profiles对应的index
# rows.append(users[(users['userid'] == u)].index[0])
rows = [self.index[u] for u in profiles]
cols = [self.index[u] for u in users]
row_matrix = self.R[rows,:]
# R中找rows中每列最大值
return np.sum(np.max(row_matrix[:,cols],axis=0)) / len(cols)
# R中找rows中的每列平均值
# return np.sum(np.mean(R[rows,:],axis=0),axis=0) / self.num
# 计算结构代表性
def Rt(self,profiles):
# neighbours = reduce(lambda x,y:x | y,[set(all_neighbors(self.g,self.id_dic[str(u)])) if self.id_dic[str(u)] in self.g else set() for u in profiles])
neighbours = reduce(lambda x,y:x | y,[set(all_neighbors(self.g,str(u))) if str(u) in self.g else set() for u in profiles])
# 所有相邻的
NS = len(neighbours - set(profiles))
VS = self.num - len(profiles)
return 1.0 * NS / VS
# 计算结构代表性
def Rt(self,users,profiles):
# neighbours = reduce(lambda x,y:x | y,[set(all_neighbors(self.g,self.id_dic[str(u)])) if self.id_dic[str(u)] in self.g else set() for u in profiles])
neighbours = reduce(lambda x,y:x | y,[set(all_neighbors(self.g,str(u))) if str(u) in self.g else set() for u in profiles])
# 所有相邻的
NS = len(neighbours - set(profiles) & set(users))
VS = len(users)
return 1.0 * NS / VS
# 目标函数
def RScore(self,profiles):
rc = round(self.Rc(self.users,profiles,self.R),3)
rt = round(self.Rt(profiles),3)
return rc,rt,round(2.0 * rc * rt / (rc + rt),3)
def ReadUsers(self,path):
with open(path,'r') as f:
lines = f.read()
profiles = set(lines.split(","))
profiles = set([profile.strip() for profile in profiles if len(profile.strip()) != ""])
return profiles
# 计算找出的代表性人物的准确率和召回率,以及 F1-score
def PR(self,profiles,category,alpha,dataset):
# 读取标注好的代表性人物
path = GroundTruth_Path + dataset + "/" + category + "/" + str(alpha)
true_profiles = self.ReadUsers(path)
p = len(profiles & true_profiles) * 1.0 / len(profiles)
r = len(profiles & true_profiles) * 1.0 / len(true_profiles)
if p == 0 and r == 0:
return 0,0,0
f1 = 2 * p * r / (p + r)
return p,r,f1