updated benchmark notebooks for refactored code

PyDataBlog · PyDataBlog · commit 43e1bc1ada78 · 2020-03-25T22:09:13.000+01:00
diff --git a/extras/ClusteringJL & ParallelKMeans Benchmarks.ipynb b/extras/ClusteringJL & ParallelKMeans Benchmarks.ipynb
@@ -222,11 +222,54 @@
     }
    ],
    "source": [
-    "@benchmark [ParallelKMeans.kmeans(X, i, ParallelKMeans.SingleThread(),\n",
-    "            tol=1e-4, max_iters=300, verbose=false).totalcost \n",
-    "            for i = 2:10] samples=7 seconds=300"
+    "@benchmark [ParallelKMeans.kmeans(LightElkan(), X, i, n_threads=1,\n",
+    "            tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "            for i = 2:10] samples=7 seconds=600"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@benchmark [ParallelKMeans.kmeans(Lloyd(), X, i, n_threads=1,\n",
+    "            tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "            for i = 2:10] samples=7 seconds=600"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = [ParallelKMeans.kmeans(Lloyd(), X, i, n_threads=1,\n",
+    "     tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "     for i = 2:10] samples=7 seconds=600"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 19,
@@ -253,8 +296,9 @@
     }
    ],
    "source": [
-    "b = [ParallelKMeans.kmeans(X, i, ParallelKMeans.SingleThread(),\n",
-    "        tol=1e-4, max_iters=300, verbose=false).totalcost for i = 2:10]"
+    "b = [ParallelKMeans.kmeans(LightElkan(), X, i, n_threads=1,\n",
+    "     tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "     for i = 2:10] samples=7 seconds=600"
    ]
   },
   {
@@ -305,9 +349,9 @@
     }
    ],
    "source": [
-    "@benchmark [ParallelKMeans.kmeans(X, i, ParallelKMeans.MultiThread(),\n",
-    "            tol=1e-4, max_iters=300, verbose=false).totalcost \n",
-    "            for i = 2:10] samples=7 seconds=300"
+    "@benchmark [ParallelKMeans.kmeans(LightElkan(), X, i, n_threads=6,\n",
+    "            tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "            for i = 2:10] samples=7 seconds=600"
    ]
   },
   {
@@ -336,10 +380,54 @@
     }
    ],
    "source": [
-    "c = [ParallelKMeans.kmeans(X, i, ParallelKMeans.MultiThread(), \n",
-    "        tol=1e-4, max_iters=300, verbose=false).totalcost for i = 2:10]"
+    "c = [ParallelKMeans.kmeans(LightElkan(), X, i, n_threads=6,\n",
+    "     tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "     for i = 2:10] samples=7 seconds=600"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@benchmark [ParallelKMeans.kmeans(Lloyd(), X, i, n_threads=6,\n",
+    "            tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "            for i = 2:10] samples=7 seconds=600"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "e = [ParallelKMeans.kmeans(Lloyd(), X, i, n_threads=6,\n",
+    "     tol=1e-6, max_iters=300, verbose=false).totalcost \n",
+    "     for i = 2:10] samples=7 seconds=600"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 17,
@@ -504,8 +592,10 @@
    ],
    "source": [
     "plot(a, label=\"Clustering.jl\")\n",
-    "plot!(b, label=\"Single Thread\")\n",
-    "plot!(c, label=\"Multi Thread\")"
+    "plot!(b, label=\"Elkan Single Thread Implementation\")\n",
+    "plot!(c, label=\"Elkan Multi Thread Implementation\")\n",
+    "plot!(d, label=\"Lloyd Single Thread Implementation\")\n",
+    "plot!(e, label=\"Lloyd Multi Thread Implementation\")"
    ]
   },
   {
@@ -587,4 +677,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/extras/Sklearn Benchmark.ipynb b/extras/Sklearn Benchmark.ipynb
@@ -180,34 +180,6 @@
     "X"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-03-13T08:58:58.668475Z",
-     "start_time": "2020-03-13T08:58:58.666387Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "#X_small = np.random.rand(100_000, 30)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-03-13T08:58:59.343126Z",
-     "start_time": "2020-03-13T08:58:59.341184Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "#np.savetxt('data_small.csv', X_small, delimiter=',')"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -642,4 +614,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}