Skip to content

Commit 1dd5f1c

Browse files
committed
link clean-up
1 parent 87d75b8 commit 1dd5f1c

File tree

5 files changed

+47
-41
lines changed

5 files changed

+47
-41
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
/docs/site/
99
/benchmark/tune.json
1010
.benchmarkci/
11+
.idea/*

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright (c) 2020 Andrey Oskin
1+
Copyright (c) 2020 Bernard Brenyah & Andrey Oskin
22

33
Permission is hereby granted, free of charge, to any person obtaining a copy
44
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# ParallelKMeans
22

3-
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://Arkoniak.github.io/ParallelKMeans.jl/stable)
4-
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://Arkoniak.github.io/ParallelKMeans.jl/dev)
5-
[![Build Status](https://travis-ci.com/Arkoniak/ParallelKMeans.jl.svg?branch=master)](https://travis-ci.com/Arkoniak/ParallelKMeans.jl)
6-
[![Coveralls](https://coveralls.io/repos/github/Arkoniak/ParallelKMeans.jl/badge.svg?branch=master)](https://coveralls.io/github/Arkoniak/ParallelKMeans.jl?branch=master)
3+
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://PyDataBlog.github.io/ParallelKMeans.jl/stable)
4+
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://PyDataBlog.github.io/ParallelKMeans.jl/dev)
5+
[![Build Status](https://www.travis-ci.org/PyDataBlog/ParallelKMeans.jl.svg?branch=master)](https://www.travis-ci.org/PyDataBlog/ParallelKMeans.jl)
6+
[![Coveralls](https://coveralls.io/repos/github/PyDataBlog/ParallelKMeans.jl/badge.svg?branch=master)](https://coveralls.io/github/PyDataBlog/ParallelKMeans.jl?branch=master)

docs/make.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ makedocs(;
66
pages=[
77
"Home" => "index.md",
88
],
9-
repo="https://github.com/Arkoniak/ParallelKMeans.jl/blob/{commit}{path}#L{line}",
9+
repo="https://github.com/PyDataBlog/ParallelKMeans.jl/blob/{commit}{path}#L{line}",
1010
sitename="ParallelKMeans.jl",
11-
authors="Andrey Oskin",
11+
authors="Bernard Brenyah & Andrey Oskin",
1212
assets=String[],
1313
)
1414

1515
deploydocs(;
16-
repo="github.com/Arkoniak/ParallelKMeans.jl",
16+
repo="github.com/PyDataBlog/ParallelKMeans.jl",
1717
)

src/ParallelKMeans.jl

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
11
module ParallelKMeans
2-
########################################
3-
# Based on discourse discussion
4-
# https://discourse.julialang.org/t/optimization-tips-for-my-julia-code-can-i-make-it-even-faster-and-or-memory-efficient/34614/20
2+
53

64
using StatsBase
75
import Base.Threads: @spawn, @threads
86

97
export kmeans
108

9+
"""
10+
TODO: Document function
11+
"""
1112
function divider(n, k)
1213
d = div(n, k)
1314
xz = vcat(collect((0:k-1) * d), n)
1415
return [t[1]:t[2] for t in zip(xz[1:end-1] .+ 1, xz[2:end])]
1516
end
1617

18+
"""
19+
TODO: Document function
20+
"""
1721
function pl_pairwise!(target, x, y, nth = Threads.nthreads())
1822
ncol = size(x, 2)
1923
nrow = size(x, 1)
@@ -30,6 +34,9 @@ function pl_pairwise!(target, x, y, nth = Threads.nthreads())
3034
target
3135
end
3236

37+
"""
38+
TODO: Document function
39+
"""
3340
function inner_pairwise!(target, x, y, r)
3441
ncol = size(x, 2)
3542
@inbounds for k in axes(y, 1)
@@ -46,6 +53,9 @@ function inner_pairwise!(target, x, y, r)
4653
target
4754
end
4855

56+
"""
57+
TODO: Document function
58+
"""
4959
function pairwise!(target, x, y)
5060
ncol = size(x, 2)
5161
@inbounds for k in axes(y, 1)
@@ -62,11 +72,18 @@ function pairwise!(target, x, y)
6272
target
6373
end
6474

75+
6576
"""
6677
smart_init(X, k; init="k-means++")
6778
6879
This function handles the random initialisation of the centroids from the
6980
design matrix (X) and desired groups (k) that a user supplies.
81+
82+
`k-means++` algorithm is used by default with the normal random selection
83+
of centroids from X used if any other string is attempted.
84+
85+
A tuple representing the centroids, number of rows, & columns respecitively
86+
is returned.
7087
"""
7188
function smart_init(X::Array{Float64, 2}, k::Int; init::String="k-means++")
7289
n_row, n_col = size(X)
@@ -121,10 +138,14 @@ function smart_init(X::Array{Float64, 2}, k::Int; init::String="k-means++")
121138
end
122139

123140

141+
124142
"""
125143
sum_of_squares(x, labels, centre, k)
126144
127-
This function computes the total sum of squares
145+
This function computes the total sum of squares based on the assigned (labels)
146+
design matrix(x), centroids (centre), and the number of desired groups (k).
147+
148+
A Float type representing the computed metric is returned.
128149
"""
129150
function sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Array)
130151
s = 0.0
@@ -138,42 +159,26 @@ function sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Arr
138159
return s
139160
end
140161

141-
# function sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Array, nth = Base.Threads.nthreads())
142-
# s = 0.0
143-
#
144-
# @inbounds for j in axes(x, 2)
145-
# for i in axes(x, 1)
146-
# s += (x[i, j] - centre[labels[i], j])^2
147-
# end
148-
# end
149-
#
150-
# return s
151-
# end
152-
#
153-
#
154-
# function inner_sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Array, r)
155-
# s = 0.0
156-
#
157-
# @inbounds for j in axes(x, 2)
158-
# for i in r
159-
# s += (x[i, j] - centre[labels[i], j])^2
160-
# end
161-
# end
162-
#
163-
# return s
164-
# end
165162

166163
"""
167164
Kmeans(design_matrix, k; k_init="k-means++", max_iters=300, tol=1e-4, verbose=true)
168165
169-
This main function employs the K-means algorithm to cluster all examples
170-
in the training data (design_matrix) into k groups using either the
171-
`k-means++` or random initialisation.
166+
This main function employs the K-means algorithm to cluster all examples
167+
in the training data (design_matrix) into k groups using either the
168+
`k-means++` or random initialisation technique for selecting the initial
169+
centroids.
170+
171+
At the end of the number of iterations specified (max_iters), convergence is
172+
achieved if difference between the current and last cost objective is
173+
less than the tolerance level (tol). An error is thrown if convergence fails.
174+
175+
Details of operations can be either printed or not by setting verbose accordingly.
176+
177+
A tuple representing labels, centroids, and sum_squares respectively is returned.
172178
173-
design_matrix should have the form (number of points x point dimensionality).
174179
"""
175180
function kmeans(design_matrix::Array{Float64, 2}, k::Int; k_init::String = "k-means++",
176-
max_iters::Int = 300, tol = 1e-4, verbose::Bool = true)
181+
max_iters::Int = 300, tol::Float64 = 1e-4, verbose::Bool = true)
177182

178183
centroids, n_row, n_col = smart_init(design_matrix, k, init=k_init)
179184

0 commit comments

Comments
 (0)