Skip to content

Commit 87d75b8

Browse files
author
Andrey Oskin
committed
additional environment setup
1 parent 03bcbc7 commit 87d75b8

File tree

10 files changed

+163
-29
lines changed

10 files changed

+163
-29
lines changed

.github/workflows/CompatHelper.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: CompatHelper
2+
3+
on:
4+
schedule:
5+
- cron: '00 00 * * *'
6+
push:
7+
branches:
8+
- actions/trigger/CompatHelper
9+
10+
jobs:
11+
CompatHelper:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: julia-actions/setup-julia@latest
15+
with:
16+
version: 1.3
17+
- name: Pkg.add("CompatHelper")
18+
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
19+
- name: CompatHelper.main()
20+
env:
21+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22+
run: >-
23+
julia -e '
24+
using CompatHelper;
25+
CompatHelper.main() do;
26+
run(`julia --project=test/environments/main -e "import Pkg; Pkg.instantiate(); Pkg.update()"`);
27+
run(`julia --project=docs -e "import Pkg; Pkg.instantiate(); Pkg.update()"`);
28+
end
29+
'

.github/workflows/TagBot.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
name: TagBot
2+
on:
3+
schedule:
4+
- cron: 0 * * * *
5+
jobs:
6+
TagBot:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: JuliaRegistries/TagBot@v1
10+
with:
11+
token: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/benchmarks.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: Run benchmarks
2+
3+
on:
4+
pull_request:
5+
6+
jobs:
7+
Benchmark:
8+
runs-on: ubuntu-latest
9+
steps:
10+
- uses: actions/checkout@v2
11+
- uses: julia-actions/setup-julia@latest
12+
with:
13+
version: 1.3
14+
- name: Install dependencies
15+
run: julia -e 'using Pkg; pkg"add PkgBenchmark BenchmarkCI@0.1"'
16+
- name: Run benchmarks
17+
run: julia -e 'using PkgBenchmark, BenchmarkCI; BenchmarkCI.judge();'
18+
- name: Post results
19+
run: julia -e "using BenchmarkCI; BenchmarkCI.postjudge()"
20+
env:
21+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@
66
/dev/
77
/docs/build/
88
/docs/site/
9+
/benchmark/tune.json
10+
.benchmarkci/

Project.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
name = "ParallelKMeans"
22
uuid = "42b8e9d4-006b-409a-8472-7f34b3fb58af"
3-
authors = ["Andrey Oskin"]
3+
authors = ["Bernard Brenyah", "Andrey Oskin"]
44
version = "0.1.0"
55

66
[deps]
7+
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
78

89
[compat]
910
julia = "1.3"
1011

1112
[extras]
13+
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1214
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1315

1416
[targets]
15-
test = ["Test"]
17+
test = ["Test", "Random"]

benchmark/bench01_distance.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
module BenchDistance
2+
using BenchmarkTools
3+
using ParallelKMeans
4+
using Distances
5+
using Random
6+
7+
suite = BenchmarkGroup()
8+
9+
Random.seed!(2020)
10+
X = rand(100_000, 3)
11+
centroids = rand(2, 3)
12+
d = rand(100_000, 2)
13+
suite["100kx3"] = @benchmarkable ParallelKMeans.pairwise!($d, $X, $centroids)
14+
15+
X = rand(100_000, 10)
16+
centroids = rand(2, 10)
17+
d = rand(100_000, 2)
18+
suite["100kx10"] = @benchmarkable ParallelKMeans.pairwise!($d, $X, $centroids)
19+
20+
# for reference
21+
metric = SqEuclidean()
22+
suite["100kx10_distances"] = @benchmarkable Distances.pairwise!($d, $metric, $X, $centroids, dims = 1)
23+
24+
end # module
25+
26+
BenchDistance.suite

src/ParallelKMeans.jl

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module ParallelKMeans
33
# Based on discourse discussion
44
# https://discourse.julialang.org/t/optimization-tips-for-my-julia-code-can-i-make-it-even-faster-and-or-memory-efficient/34614/20
55

6+
using StatsBase
67
import Base.Threads: @spawn, @threads
78

89
export kmeans
@@ -137,30 +138,30 @@ function sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Arr
137138
return s
138139
end
139140

140-
function sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Array, nth = Base.Threads.nthreads())
141-
s = 0.0
142-
143-
@inbounds for j in axes(x, 2)
144-
for i in axes(x, 1)
145-
s += (x[i, j] - centre[labels[i], j])^2
146-
end
147-
end
148-
149-
return s
150-
end
151-
152-
153-
function inner_sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Array, r)
154-
s = 0.0
155-
156-
@inbounds for j in axes(x, 2)
157-
for i in r
158-
s += (x[i, j] - centre[labels[i], j])^2
159-
end
160-
end
161-
162-
return s
163-
end
141+
# function sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Array, nth = Base.Threads.nthreads())
142+
# s = 0.0
143+
#
144+
# @inbounds for j in axes(x, 2)
145+
# for i in axes(x, 1)
146+
# s += (x[i, j] - centre[labels[i], j])^2
147+
# end
148+
# end
149+
#
150+
# return s
151+
# end
152+
#
153+
#
154+
# function inner_sum_of_squares(x::Array{Float64,2}, labels::Array{Int64,1}, centre::Array, r)
155+
# s = 0.0
156+
#
157+
# @inbounds for j in axes(x, 2)
158+
# for i in r
159+
# s += (x[i, j] - centre[labels[i], j])^2
160+
# end
161+
# end
162+
#
163+
# return s
164+
# end
164165

165166
"""
166167
Kmeans(design_matrix, k; k_init="k-means++", max_iters=300, tol=1e-4, verbose=true)

test/runtests.jl

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
1-
using ParallelKMeans
1+
module TestParallelKMeans
22
using Test
33

4-
@testset "ParallelKMeans.jl" begin
5-
# Write your own tests here.
4+
for file in sort([file for file in readdir(@__DIR__) if
5+
occursin(r"^test[_0-9]+.*\.jl$", file)])
6+
m = match(r"test[_0-9]+(.*).jl", file)
7+
8+
@testset "$(m[1])" begin
9+
# Here you can optionally exclude some test files
10+
# VERSION < v"1.1" && file == "test_xxx.jl" && continue
11+
12+
include(file)
13+
end
614
end
15+
16+
end # module

test/test01_distance.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
module TestDistance
2+
using ParallelKMeans: pairwise!, pl_pairwise!
3+
using Test
4+
5+
@testset "naive singlethread pairwise" begin
6+
X = [1.0 2.0; 3.0 5.0; 4.0 6.0]
7+
y = [1.0 2.0; ]
8+
r = Array{Float64, 2}(undef, 3, 1)
9+
10+
pairwise!(r, X, y)
11+
@test all(r .≈ [0.0, 13.0, 25.0])
12+
end
13+
14+
end # module

test/test02_kmeans.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
module TestKMeans
2+
using ParallelKMeans
3+
using Test
4+
using Random
5+
6+
@testset "linear separation" begin
7+
Random.seed!(2020)
8+
9+
X = rand(100, 3)
10+
labels, centroids, sum_squares = kmeans(X, 3; tol = 1e-10, verbose = false)
11+
12+
# for future reference: Clustering shows here 14.964882850452984
13+
# guess they use better initialisation. For now we will use own
14+
# value
15+
@test sum_squares 15.314823028363763
16+
end
17+
18+
end # module

0 commit comments

Comments
 (0)