-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkmeans.cpp
More file actions
125 lines (108 loc) · 3.42 KB
/
kmeans.cpp
File metadata and controls
125 lines (108 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <unistd.h>
#include "kmeans.h"
#include "io.h"
void print_usage(char *prog_name) {
fprintf(stderr, "Usage: %s -k <clusters> -d <dims> -i <input_file> -m <max_iter> -t <threshold> [-c] -s <seed> -v <version>\n", prog_name);
fprintf(stderr, " -k: number of clusters\n");
fprintf(stderr, " -d: dimension of points\n");
fprintf(stderr, " -i: input file name\n");
fprintf(stderr, " -m: maximum iterations\n");
fprintf(stderr, " -t: convergence threshold\n");
fprintf(stderr, " -c: output centroids (default: output assignments)\n");
fprintf(stderr, " -s: random seed\n");
fprintf(stderr, " -v: version (0=CPU, 1=CUDA Basic, 2=CUDA Shared Memory, 3=Thrust)\n");
}
void initialize_centers(double *points, double *centers, int n, int k, int d, int seed) {
kmeans_srand(seed);
int *selected = (int *)calloc(n, sizeof(int));
for (int i = 0; i < k; i++) {
int index;
do {
index = kmeans_rand() % n;
} while (selected[index]);
selected[index] = 1;
for (int j = 0; j < d; j++) {
centers[i * d + j] = points[index * d + j];
}
}
free(selected);
}
int main(int argc, char *argv[]) {
kmeans_args args = {0};
args.max_iter = 150;
args.threshold = 1e-5;
args.version = 0;
int opt;
while ((opt = getopt(argc, argv, "k:d:i:m:t:cs:v:")) != -1) {
switch (opt) {
case 'k':
args.k = atoi(optarg);
break;
case 'd':
args.d = atoi(optarg);
break;
case 'i':
args.input_file = optarg;
break;
case 'm':
args.max_iter = atoi(optarg);
break;
case 't':
args.threshold = atof(optarg);
break;
case 'c':
args.output_centroids = true;
break;
case 's':
args.seed = atoi(optarg);
break;
case 'v':
args.version = atoi(optarg);
break;
default:
print_usage(argv[0]);
return 1;
}
}
if (args.k <= 0 || args.d <= 0 || !args.input_file) {
print_usage(argv[0]);
return 1;
}
int n;
double *points = read_points(args.input_file, &n, args.d);
if (!points) {
return 1;
}
args.n = n;
double *centers = (double *)malloc(args.k * args.d * sizeof(double));
int *assignments = (int *)malloc(n * sizeof(int));
initialize_centers(points, centers, n, args.k, args.d, args.seed);
kmeans_result result;
switch(args.version) {
case 0:
result = kmeans_cpu(points, centers, assignments, &args);
break;
case 1:
result = kmeans_kernel(points, centers, assignments, &args);
break;
case 2:
result = kmeans_shared(points, centers, assignments, &args);
break;
case 3:
result = kmeans_thrust(points, centers, assignments, &args);
break;
default:
print_usage(argv[0]);
return 1;
}
printf("%d,%lf\n", result.iterations, result.total_time / result.iterations);
if (args.output_centroids) {
write_centroids(centers, args.k, args.d);
} else {
write_assignments(assignments, n);
}
free_points(points);
free(centers);
free(assignments);
return 0;
}