diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index cd48aaa..aacc3f3 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -13,19 +13,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "64d18ee8", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\tymek\\OneDrive\\Pulpit\\Studia\\Licencjat\\BlockingPy\\.venv\\Lib\\site-packages\\blocklib\\__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", - " import pkg_resources\n" - ] - } - ], + "outputs": [], "source": [ "from blocklib import generate_candidate_blocks\n", "import itertools, time, pandas as pd\n", @@ -56,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "78938c81", "metadata": {}, "outputs": [], @@ -98,9 +89,7 @@ " \"number-hash-functions\": 20,\n", " \"bf-len\": 4096, \n", " },\n", - "\n", " \"signatureSpecs\": [\n", - "\n", " [\n", " {\"type\": \"characters-at\", \"feature\": \"imie\", \"config\": {\"pos\": [0]}},\n", " {\"type\": \"characters-at\", \"feature\": \"nazwisko\", \"config\": {\"pos\": [0]}},\n", @@ -265,568 +254,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "4a5751fb", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 6653\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 79\n", - "\tAverage Block Size: 1.35277318502931\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.9090505606466432\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 48857\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 765\n", - "\tAverage Block Size: 1.8421106494463435\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 6.958175304535466\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 297692\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 8105\n", - "\tAverage Block Size: 3.023258938768929\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 28.961964569332338\n", - "[1/10] 2025-10-09T23:18:29 seed=42 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 6738\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 64\n", - "\tAverage Block Size: 1.335707925200356\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 2.040797657752163\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 51571\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 784\n", - "\tAverage Block Size: 1.7451668573423047\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.709601032923595\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 332122\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 7551\n", - "\tAverage Block Size: 2.709847586127989\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 30.604934098595503\n", - "[2/10] 2025-10-09T23:27:30 seed=43 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 7432\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 33\n", - "\tAverage Block Size: 1.2109795479009688\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.064289509986509\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 57832\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 314\n", - "\tAverage Block Size: 1.5562318439618203\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 3.7896689028788577\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 378414\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 3279\n", - "\tAverage Block Size: 2.3783475241402274\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 15.512863515163133\n", - "[3/10] 2025-10-09T23:36:01 seed=44 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 6779\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 57\n", - "\tAverage Block Size: 1.3276294438707774\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.7222844271069944\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 50703\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 674\n", - "\tAverage Block Size: 1.7750428968700076\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 6.4330191313417755\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 316875\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 6628\n", - "\tAverage Block Size: 2.8402366863905324\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 26.544626476768187\n", - "[4/10] 2025-10-09T23:44:42 seed=45 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 7101\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 41\n", - "\tAverage Block Size: 1.267427122940431\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.3332770119667143\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 54828\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 414\n", - "\tAverage Block Size: 1.6414970453053184\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 4.834238214971742\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 351414\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 3971\n", - "\tAverage Block Size: 2.5610818009527225\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 19.114605276337848\n", - "[5/10] 2025-10-09T23:53:16 seed=46 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 7016\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 73\n", - "\tAverage Block Size: 1.282782212086659\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.5778654297817563\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 53041\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 679\n", - "\tAverage Block Size: 1.6968005882242039\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 5.747905951992681\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 331568\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 6754\n", - "\tAverage Block Size: 2.714375331756985\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 23.08521939806034\n", - "[6/10] 2025-10-10T00:01:49 seed=47 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 7014\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 59\n", - "\tAverage Block Size: 1.2831479897348161\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.5280852870741408\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 54015\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 552\n", - "\tAverage Block Size: 1.6662038322688142\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 5.450313909056753\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 347370\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 5249\n", - "\tAverage Block Size: 2.5908973141031177\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 21.46122787833175\n", - "[7/10] 2025-10-10T00:10:23 seed=48 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 7163\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 49\n", - "\tAverage Block Size: 1.2564567918469915\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.4377082003046628\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 56565\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 512\n", - "\tAverage Block Size: 1.5910898965791567\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 4.81472352710474\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 367319\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 5212\n", - "\tAverage Block Size: 2.4501863502840857\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 19.7780256871076\n", - "[8/10] 2025-10-10T00:18:56 seed=49 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 7044\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 44\n", - "\tAverage Block Size: 1.2776831345826236\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.3870894016610962\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 52973\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 397\n", - "\tAverage Block Size: 1.6989787250108546\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 5.222591758381597\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 330740\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 3763\n", - "\tAverage Block Size: 2.7211707081090886\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 21.131883787876173\n", - "[9/10] 2025-10-10T00:27:28 seed=50 — done\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 2410\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 28\n", - "\tAverage Block Size: 1.5647302904564315\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.653505788488471\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 6917\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 77\n", - "\tAverage Block Size: 1.3011421136330779\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 1.8512388180764936\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 11986\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 282\n", - "\tAverage Block Size: 3.1626898047722345\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 7.817199968183582\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 52411\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 831\n", - "\tAverage Block Size: 1.7171967716700693\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 6.797067016423525\n", - "P-Sig: 100.0% records are covered in blocks\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 42514\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 2701\n", - "\tAverage Block Size: 8.920590864185915\n", - "\tMedian Block Size: 2\n", - "\tStandard Deviation of Block Size: 42.684873181071644\n", - "Statistics for the generated blocks:\n", - "\tNumber of Blocks: 334293\n", - "\tMinimum Block Size: 1\n", - "\tMaximum Block Size: 8883\n", - "\tAverage Block Size: 2.6922490150855687\n", - "\tMedian Block Size: 1\n", - "\tStandard Deviation of Block Size: 27.601290820720237\n", - "[10/10] 2025-10-10T00:36:07 seed=51 — done\n" - ] - }, - { - "data": { - "text/plain": [ - "( algorithm dataset_size n_runs time_sec_mean \\\n", - " 0 BlockingPy (faiss_hnsw) 1500 10 0.1635 \n", - " 1 BlockingPy (faiss_hnsw) 15000 10 12.0287 \n", - " 2 BlockingPy (faiss_hnsw) 150000 10 250.0123 \n", - " 3 BlockingPy (faiss_lsh) 1500 10 0.1846 \n", - " 4 BlockingPy (faiss_lsh) 15000 10 1.3979 \n", - " 5 BlockingPy (faiss_lsh) 150000 10 56.5441 \n", - " 6 BlockingPy (voyager) 1500 10 0.2928 \n", - " 7 BlockingPy (voyager) 15000 10 6.7772 \n", - " 8 BlockingPy (voyager) 150000 10 110.3409 \n", - " 9 BlockingPy (voyager) - fast 1500 10 0.2148 \n", - " 10 BlockingPy (voyager) - fast 15000 10 2.4950 \n", - " 11 BlockingPy (voyager) - fast 150000 10 34.3954 \n", - " 12 P-Sig 1500 10 0.0505 \n", - " 13 P-Sig 15000 10 0.3885 \n", - " 14 P-Sig 150000 10 3.4439 \n", - " 15 λ-fold LSH 1500 10 0.1932 \n", - " 16 λ-fold LSH 15000 10 2.0159 \n", - " 17 λ-fold LSH 150000 10 19.8484 \n", - " \n", - " time_sec_sd recall_mean recall_sd rr_mean rr_sd pairs_mean \\\n", - " 0 0.049185 0.959938 0.000000 0.997533 0.000000e+00 2773.0 \n", - " 1 1.681393 0.913009 0.000108 0.999726 2.678933e-07 30869.3 \n", - " 2 4.232435 0.832092 0.000507 0.999967 7.582459e-08 375155.3 \n", - " 3 0.015153 0.955470 0.008391 0.997371 1.121384e-04 2954.6 \n", - " 4 0.148485 0.899706 0.003506 0.999708 7.487838e-06 32891.3 \n", - " 5 1.314511 0.818256 0.001381 0.999964 4.497453e-07 400043.4 \n", - " 6 0.029329 0.951772 0.006036 0.997382 2.485956e-05 2942.5 \n", - " 7 0.903270 0.875978 0.004538 0.999635 7.835080e-06 41043.1 \n", - " 8 3.815316 0.715393 0.005450 0.999940 1.900283e-06 674887.8 \n", - " 9 0.015157 0.921726 0.010369 0.996613 1.811625e-04 3806.9 \n", - " 10 0.172636 0.713395 0.017146 0.999297 4.130763e-05 79066.3 \n", - " 11 0.722112 0.450416 0.016316 0.999820 9.093882e-06 2024741.4 \n", - " 12 0.010058 0.599384 0.000000 0.996124 0.000000e+00 4358.0 \n", - " 13 0.087195 0.616241 0.000000 0.996380 0.000000e+00 407185.0 \n", - " 14 0.090938 0.608723 0.000000 0.996424 0.000000e+00 40231251.0 \n", - " 15 0.009211 0.465794 0.033199 0.990865 2.847404e-03 10270.2 \n", - " 16 0.250974 0.453983 0.027622 0.991644 2.992529e-03 939938.4 \n", - " 17 0.476147 0.450190 0.026086 0.991550 3.044599e-03 95064029.1 \n", - " \n", - " pairs_sd \n", - " 0 0.000000e+00 \n", - " 1 3.004090e+01 \n", - " 2 8.529769e+02 \n", - " 3 1.260443e+02 \n", - " 4 8.424157e+02 \n", - " 5 5.059325e+03 \n", - " 6 2.812373e+01 \n", - " 7 8.814900e+02 \n", - " 8 2.137802e+04 \n", - " 9 2.038401e+02 \n", - " 10 4.646664e+03 \n", - " 11 1.023054e+05 \n", - " 12 0.000000e+00 \n", - " 13 0.000000e+00 \n", - " 14 0.000000e+00 \n", - " 15 3.201193e+03 \n", - " 16 3.366370e+05 \n", - " 17 3.425151e+07 ,\n", - " run algorithm dataset_size time_sec recall \\\n", - " 170 51 BlockingPy (voyager) 15000 6.382 0.881361 \n", - " 171 51 BlockingPy (faiss_hnsw) 15000 11.345 0.913070 \n", - " 172 51 BlockingPy (faiss_lsh) 15000 1.352 0.900851 \n", - " 173 51 BlockingPy (voyager) - fast 15000 2.391 0.720186 \n", - " 174 51 P-Sig 150000 3.409 0.608723 \n", - " 175 51 λ-fold LSH 150000 19.775 0.482812 \n", - " 176 51 BlockingPy (voyager) 150000 108.079 0.718584 \n", - " 177 51 BlockingPy (faiss_hnsw) 150000 247.157 0.831695 \n", - " 178 51 BlockingPy (faiss_lsh) 150000 56.089 0.816388 \n", - " 179 51 BlockingPy (voyager) - fast 150000 34.015 0.437246 \n", - " \n", - " reduction_ratio pairs \n", - " 170 0.999636 40982 \n", - " 171 0.999725 30910 \n", - " 172 0.999706 33109 \n", - " 173 0.999322 76307 \n", - " 174 0.996424 40231251 \n", - " 175 0.988613 128098559 \n", - " 176 0.999941 665049 \n", - " 177 0.999967 375716 \n", - " 178 0.999964 400416 \n", - " 179 0.999816 2069035 )" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "N_RUNS = 10\n", "\n",