-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.py
More file actions
117 lines (98 loc) · 4.41 KB
/
evaluate.py
File metadata and controls
117 lines (98 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from skfeature.utility import unsupervised_evaluation
from util import load_dataset
import nxmetis
import numpy as np
import networkx as nx
import time
import argparse
import warnings
warnings.filterwarnings("ignore")
DATASETS = ['COIL20', 'colon', 'gisette', 'Lung-Cancer', 'madelon', 'Movementlibras', 'nci9', 'ORL', 'Sonar', 'UAV1', 'UAV2', 'waveform-5000']
SELECT_PER_CLUSTER = 1
EPOCHS = [1, 50, 100, 150, 200, 250, 300]
parser = argparse.ArgumentParser()
parser.add_argument('-a', '--alpha', type=float, required=False, default=1.0)
parser.add_argument('-b', '--beta', type=float, required=False, default=0.001)
parser.add_argument('-p', '--percent', type=float, required=False, default=0.1)
args = parser.parse_args()
NUM_SELECT = args.percent
def main(DATASET, epoch):
# load data
X, y, NUM_CLASSES, num_features = load_dataset(DATASET)
adj = np.loadtxt('results/matrix/DEC_' + DATASET + '_' + str(epoch) + '_relation.csv', delimiter=",")
# adj = np.corrcoef(X.T)
adj = np.abs(adj)
selected_features = []
select = np.zeros(num_features)
count = 0
num_selected = int(NUM_SELECT * num_features)
adj = adj - adj * np.eye(adj.shape[0])
adj[adj <= 0] = 0
adj_int = adj/np.max(adj)*1000
adj_int = adj_int.astype(np.int)
# Graph Segmentation =============================================================
adj_int = nx.from_numpy_array(adj_int)
(st, parts) = nxmetis.partition(adj_int, int((num_selected+SELECT_PER_CLUSTER-1) / SELECT_PER_CLUSTER), recursive=False)
cluster_membership = {node: membership for node, membership in enumerate(parts)}
for c in cluster_membership.keys():
for _ in range(SELECT_PER_CLUSTER):
pick, max_weight = 0, 0.0
for i in range(len(cluster_membership[c])):
if select[cluster_membership[c][i]] != 0:
continue
cur_weight = 0
for j in range(len(cluster_membership[c])):
cur_weight += adj[cluster_membership[c][i]][cluster_membership[c][j]]
if max_weight < cur_weight:
max_weight = cur_weight
pick = i
if len(cluster_membership[c]) == 0:
continue
original_index = cluster_membership[c][pick]
select[original_index] = 1
X = np.array(X)
X = X.T
count = 0
for i in range(num_features):
if select[i] == 1:
selected_features.append(X[i])
count += 1
selected_features = np.array(selected_features)
selected_features = selected_features.T
# perform kmeans clustering based on the selected features and repeats 20 times
nmi_total = []
acc_total = []
for i in range(20):
nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=NUM_CLASSES, y=y)
nmi_total.append(nmi)
acc_total.append(acc)
nmi_total = np.array(nmi_total)
acc_total = np.array(acc_total)
np.savetxt('DEC_evaluation/selected/DEC_' + DATASET + '_selected_' + str(NUM_SELECT) + '.csv', select, delimiter = ',')
return np.mean(nmi_total), np.mean(acc_total), count, np.std(nmi_total), np.std(acc_total)
if __name__ == '__main__':
res_acc, res_acc_std = [], []
res_nmi, res_nmi_std = [], []
start = time.time()
now = time.time()
for dataset in DATASETS:
cur_acc, cur_nmi, cur_acc_std, cur_nmi_std = [], [], [], []
for epoch in EPOCHS:
nmi, acc, count, std_nmi, std = main(dataset, epoch)
cur_acc.append(acc)
cur_acc_std.append(std)
cur_nmi.append(nmi)
cur_nmi_std.append(std_nmi)
res_acc.append(np.array(cur_acc))
res_acc_std.append(np.array(cur_acc_std))
res_nmi.append(np.array(cur_nmi))
res_nmi_std.append(np.array(cur_nmi_std))
print(dataset, '----Selected:', count, '----Time:', time.time() - now, res_acc[-1])
now = time.time()
res_acc = np.array(res_acc)
res_acc_std = np.array(res_acc_std)
res_nmi = np.array(res_nmi)
res_nmi_std = np.array(res_nmi_std)
np.savetxt('DEC_evaluation/DEC_acc_' + str(args.alpha) + '_' + str(args.beta) + '_' + str(NUM_SELECT) + '.csv', res_acc, delimiter = ',')
np.savetxt('DEC_evaluation/DEC_acc_std_' + str(args.alpha) + '_' + str(args.beta) + '_' + str(NUM_SELECT) + '.csv', res_acc_std, delimiter = ',')
print("TOTAL TIME:", time.time() - start, res_acc)