diff --git a/data/benchmarks/results_cuvs_26_04.csv b/data/benchmarks/results_cuvs_26_04.csv new file mode 100644 index 0000000000..3f5a52c1fc --- /dev/null +++ b/data/benchmarks/results_cuvs_26_04.csv @@ -0,0 +1,130 @@ +SKU Hardware Type GPU CPU dtype cuVS Algo Mode Recall Range Total Vectors Dimensions Index Build Time (s) Search Batch Size TopK Mean Search Throughput (QPS) Mean Search Latency (ms) Mean Recall N Points in Bucket +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [90-95%) 1000000 1024 107 1 10 41744 7.184 0.93 7 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [95-99%) 1000000 1024 110 1 10 20724 15.658 0.9694 8 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput >=99% 1000000 1024 169 1 10 3507 51.33 0.9954 15 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [90-95%) 1000000 1024 107 1 1000 2890 39.745 0.9226 7 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [95-99%) 1000000 1024 195 1 1000 1366 50.766 0.9569 5 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [90-95%) 1000000 1024 106 10 10 44388 66.702 0.9278 7 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [95-99%) 1000000 1024 99.87 10 10 17214 198.402 0.9761 10 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput >=99% 1000000 1024 169.81 10 10 3360 395.769 0.9955 14 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [90-95%) 1000000 1024 99.2 10 1000 2720 401.52 0.9283 9 +8480_plus CPU NA 2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids FP32 hnswlib throughput [95-99%) 1000000 1024 194.66 10 1000 1543 498.517 0.9572 3 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra throughput [90-95%) 1000000 1024 14.11 1 10 34073 7.821 0.9289 4 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra throughput [95-99%) 1000000 1024 14.63 1 10 29745 9.03 0.9806 10 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra throughput >=99% 1000000 1024 16.66 1 10 11589 10.219 0.997 43 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra throughput [90-95%) 1000000 1024 16.43 10 10 189221 9.328 0.9291 2 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra throughput [95-99%) 1000000 1024 16.93 10 10 98065 9.474 0.9805 20 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra throughput >=99% 1000000 1024 16.94 10 10 39666 102.733 0.997 21 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 22.97 1 10 1418 5.262 0.9129 7 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 24.9 1 10 624 7.651 0.9776 9 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 23.83 1 10 269 973.856 0.9981 4 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 39.24 10 10 4324 2.818 0.9283 9 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 26.28 10 10 2334 1368.22 0.9762 20 +A100-SXM4-80GB GPU A100-SXM4-80GB 2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 27.24 10 10 954 1878.792 0.997 21 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [90-95%) 1000000 1024 19.25 1 10 27700 5.851 0.9306 4 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [95-99%) 1000000 1024 18.94 1 10 21962 6.799 0.9816 12 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput >=99% 1000000 1024 21.85 1 10 7613 9.201 0.9967 31 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [90-95%) 1000000 1024 18.67 10 10 103569 6.504 0.9284 7 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [95-99%) 1000000 1024 20.23 10 10 59180 9.946 0.9835 7 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput >=99% 1000000 1024 21.01 10 10 14266 71.157 0.9967 24 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 45.33 1 10 1313 3.499 0.9297 7 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 26.85 1 10 960 3.308 0.9646 2 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 27.54 1 10 320 356.949 0.996 7 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 29.83 10 10 5633 6.365 0.9123 7 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 27.55 10 10 2540 362.402 0.9744 17 +A10G GPU A10G 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 29.89 10 10 841 1737.822 0.9982 10 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [90-95%) 1000000 1024 89.29 1 10 27348 13.872 0.9302 9 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [95-99%) 1000000 1024 129.23 1 10 11269 42.858 0.9719 13 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput >=99% 1000000 1024 185.3 1 10 2403 94.376 0.9954 21 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [90-95%) 1000000 1024 114.22 1 1000 2436 176.568 0.9413 3 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [95-99%) 1000000 1024 224.1 1 1000 1572 203.731 0.9563 4 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [90-95%) 1000000 1024 76.06 10 10 35837 123.799 0.9209 5 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [95-99%) 1000000 1024 98.4 10 10 10703 481.908 0.9795 11 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput >=99% 1000000 1024 195.09 10 10 2410 751.321 0.9963 16 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [90-95%) 1000000 1024 135.66 10 1000 2352 1284.847 0.9424 3 +Genoa_9654 CPU NA 2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa FP32 hnswlib throughput [95-99%) 1000000 1024 223.32 10 1000 1233 1324.779 0.9561 6 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra throughput [90-95%) 1000000 1024 10.86 1 10 38983 2.844 0.9131 1 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 10.17 1 10 35843 7.846 0.9709 4 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 10.74 1 10 15645 12.862 0.9974 42 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra throughput [90-95%) 1000000 1024 8.96 10 10 298758 13.914 0.9284 7 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 10.15 10 10 189566 13.806 0.9774 10 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 10.25 10 10 62458 114.422 0.9958 19 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 15.16 1 10 1144 0.902 0.9311 9 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 14.14 1 10 578 2.247 0.9747 19 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 14.57 1 10 188 259.522 0.9966 7 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 15.2 10 10 7895 1.332 0.9236 8 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 14.26 10 10 3856 2.898 0.9708 16 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 14.88 10 10 978 1493.781 0.997 31 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 64.97 1 10 1312 18.573 0.923 19 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 76.69 1 10 711 158.355 0.9724 16 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 101.42 1 10 316 648.719 0.9955 5 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 64.92 10 10 1310 165.032 0.9273 12 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 70.42 10 10 657 484.856 0.9762 16 +H100-SXM-80GB GPU H100-SXM-80GB 2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 102.77 10 10 294 898.319 0.9958 6 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [90-95%) 1000000 1024 23.73 1 10 30133 2.684 0.9317 4 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [95-99%) 1000000 1024 25.09 1 10 19020 6.492 0.9788 18 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput >=99% 1000000 1024 26.56 1 10 5911 8.01 0.9964 40 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [90-95%) 1000000 1024 21.82 10 10 74020 13.567 0.9272 2 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput [95-99%) 1000000 1024 25.78 10 10 35277 21.027 0.984 11 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra throughput >=99% 1000000 1024 26.88 10 10 6923 139.686 0.9969 25 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 44.67 1 10 1301 2.316 0.9309 4 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 36.47 1 10 629 7.378 0.9784 8 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 34.82 1 10 280 318.926 0.9953 6 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 38.16 10 10 5517 2.349 0.9253 7 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 34.19 10 10 2523 269.387 0.9768 16 +L4 GPU L4 2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 37.04 10 10 923 1632.779 0.9971 23 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 10.05 1 10 38249 6.546 0.9804 9 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 10.95 1 10 11870 25.83 0.9971 44 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [90-95%) 1000000 1024 9.43 10 10 178441 18.652 0.9274 4 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 10.34 10 10 72111 22.845 0.9798 16 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 11.39 10 10 22046 753.079 0.9978 16 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 13.67 1 10 1853 1.218 0.9268 3 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 13.63 1 10 671 2.822 0.973 17 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 13.7 1 10 226 412.092 0.9974 8 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 13.45 10 10 8547 1.346 0.9306 8 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 14.3 10 10 3444 3.507 0.9791 19 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 15.92 10 10 1139 1455.341 0.9972 18 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 114.07 1 10 1464 25.114 0.9267 12 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 179.91 1 10 759 136.958 0.9756 17 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 219.95 1 10 352 462.686 0.9947 10 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 157.68 10 10 1496 45.385 0.9265 13 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 154.35 10 10 770 454.886 0.9757 13 +L40S GPU L40S 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 258.2 10 10 323 1069.649 0.9956 6 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 13.28 1 10 34776 6.423 0.9855 8 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 13.96 1 10 11716 12.764 0.9972 37 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [90-95%) 1000000 1024 12.88 10 10 176395 10.523 0.93 5 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 13.88 10 10 84601 15.826 0.9747 15 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 14.46 10 10 26068 897.21 0.9977 16 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 16.97 1 10 1277 0.941 0.9311 5 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 16.43 1 10 567 2.094 0.9757 18 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 17.75 1 10 211 277.42 0.9957 8 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 16.47 10 10 8630 1.192 0.9216 6 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 17.1 10 10 4009 2.832 0.9721 19 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 18.16 10 10 1114 562.567 0.9966 23 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 159.31 1 10 1502 23.332 0.9233 14 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 205.73 1 10 758 91.43 0.9767 11 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 302.47 1 10 323 566.174 0.9972 6 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 151.36 10 10 1537 41.827 0.923 11 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 196.79 10 10 832 432.442 0.9729 15 +RTX_PRO_4500_BSE GPU RTX PRO 4500 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 235.94 10 10 347 756.032 0.9942 9 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [90-95%) 1000000 1024 7.35 1 10 40788 7.513 0.9323 1 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 7.35 1 10 39554 5.816 0.9823 3 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 8.6 1 10 15196 13.229 0.9976 32 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [90-95%) 1000000 1024 9.15 1 1000 19355 5.054 0.906 7 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 8.79 1 1000 9418 24.15 0.9823 10 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 9.32 1 1000 3980 17.687 0.9936 6 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [90-95%) 1000000 1024 8.04 10 10 313550 14.533 0.9291 4 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput [95-99%) 1000000 1024 8.28 10 10 129726 14.584 0.9807 18 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra throughput >=99% 1000000 1024 8.57 10 10 52490 127.814 0.9949 12 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 10.49 1 10 1958 1.035 0.9321 1 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 11.56 1 10 936 2.506 0.9696 8 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 11.78 1 10 226 153.373 0.997 9 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [90-95%) 1000000 1024 11.84 10 10 8911 1.152 0.9205 5 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput [95-99%) 1000000 1024 12.82 10 10 3508 3.423 0.9774 17 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_cagra_hnswlib throughput >=99% 1000000 1024 12.9 10 10 1178 1561.954 0.9971 28 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 90.79 1 10 1496 17.152 0.9239 10 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 102.79 1 10 793 109.882 0.9752 15 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 101.19 1 10 359 269.405 0.9938 9 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [90-95%) 1000000 1024 96.62 10 10 1554 103.732 0.9244 9 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput [95-99%) 1000000 1024 84.54 10 10 841 254.994 0.9728 13 +RTX_PRO_6000_BSE GPU RTX PRO 6000 Blackwell 2x Intel 8570, 224 Threads, HT=On, Emerald Rapids FP32 cuvs_vamana throughput >=99% 1000000 1024 119.69 10 10 343 782.166 0.995 8 \ No newline at end of file diff --git a/fern/assets/data/benchmark_results.json b/fern/assets/data/benchmark_results.json new file mode 100644 index 0000000000..406830f72c --- /dev/null +++ b/fern/assets/data/benchmark_results.json @@ -0,0 +1,2453 @@ +[ + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 107, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 41744, + "Mean Search Latency (ms)": 7.184, + "Mean Recall": 0.93, + "N Points in Bucket": 7 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 110, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 20724, + "Mean Search Latency (ms)": 15.658, + "Mean Recall": 0.9694, + "N Points in Bucket": 8 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 169, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 3507, + "Mean Search Latency (ms)": 51.33, + "Mean Recall": 0.9954, + "N Points in Bucket": 15 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 107, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2890, + "Mean Search Latency (ms)": 39.745, + "Mean Recall": 0.9226, + "N Points in Bucket": 7 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 195, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1366, + "Mean Search Latency (ms)": 50.766, + "Mean Recall": 0.9569, + "N Points in Bucket": 5 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 106, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 44388, + "Mean Search Latency (ms)": 66.702, + "Mean Recall": 0.9278, + "N Points in Bucket": 7 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 99.87, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 17214, + "Mean Search Latency (ms)": 198.402, + "Mean Recall": 0.9761, + "N Points in Bucket": 10 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 169.81, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3360, + "Mean Search Latency (ms)": 395.769, + "Mean Recall": 0.9955, + "N Points in Bucket": 14 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 99.2, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2720, + "Mean Search Latency (ms)": 401.52, + "Mean Recall": 0.9283, + "N Points in Bucket": 9 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 194.66, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1543, + "Mean Search Latency (ms)": 498.517, + "Mean Recall": 0.9572, + "N Points in Bucket": 3 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.11, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 34073, + "Mean Search Latency (ms)": 7.821, + "Mean Recall": 0.9289, + "N Points in Bucket": 4 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.63, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 29745, + "Mean Search Latency (ms)": 9.03, + "Mean Recall": 0.9806, + "N Points in Bucket": 10 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.66, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11589, + "Mean Search Latency (ms)": 10.219, + "Mean Recall": 0.997, + "N Points in Bucket": 43 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.43, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 189221, + "Mean Search Latency (ms)": 9.328, + "Mean Recall": 0.9291, + "N Points in Bucket": 2 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.93, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 98065, + "Mean Search Latency (ms)": 9.474, + "Mean Recall": 0.9805, + "N Points in Bucket": 20 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.94, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 39666, + "Mean Search Latency (ms)": 102.733, + "Mean Recall": 0.997, + "N Points in Bucket": 21 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 22.97, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1418, + "Mean Search Latency (ms)": 5.262, + "Mean Recall": 0.9129, + "N Points in Bucket": 7 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 24.9, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 624, + "Mean Search Latency (ms)": 7.651, + "Mean Recall": 0.9776, + "N Points in Bucket": 9 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 23.83, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 269, + "Mean Search Latency (ms)": 973.856, + "Mean Recall": 0.9981, + "N Points in Bucket": 4 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 39.24, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 4324, + "Mean Search Latency (ms)": 2.818, + "Mean Recall": 0.9283, + "N Points in Bucket": 9 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.28, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2334, + "Mean Search Latency (ms)": 1368.22, + "Mean Recall": 0.9762, + "N Points in Bucket": 20 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 27.24, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 954, + "Mean Search Latency (ms)": 1878.792, + "Mean Recall": 0.997, + "N Points in Bucket": 21 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 19.25, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 27700, + "Mean Search Latency (ms)": 5.851, + "Mean Recall": 0.9306, + "N Points in Bucket": 4 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 18.94, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 21962, + "Mean Search Latency (ms)": 6.799, + "Mean Recall": 0.9816, + "N Points in Bucket": 12 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 21.85, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 7613, + "Mean Search Latency (ms)": 9.201, + "Mean Recall": 0.9967, + "N Points in Bucket": 31 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 18.67, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 103569, + "Mean Search Latency (ms)": 6.504, + "Mean Recall": 0.9284, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 20.23, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 59180, + "Mean Search Latency (ms)": 9.946, + "Mean Recall": 0.9835, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 21.01, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 14266, + "Mean Search Latency (ms)": 71.157, + "Mean Recall": 0.9967, + "N Points in Bucket": 24 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 45.33, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1313, + "Mean Search Latency (ms)": 3.499, + "Mean Recall": 0.9297, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.85, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 960, + "Mean Search Latency (ms)": 3.308, + "Mean Recall": 0.9646, + "N Points in Bucket": 2 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 27.54, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 320, + "Mean Search Latency (ms)": 356.949, + "Mean Recall": 0.996, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 29.83, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 5633, + "Mean Search Latency (ms)": 6.365, + "Mean Recall": 0.9123, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 27.55, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2540, + "Mean Search Latency (ms)": 362.402, + "Mean Recall": 0.9744, + "N Points in Bucket": 17 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 29.89, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 841, + "Mean Search Latency (ms)": 1737.822, + "Mean Recall": 0.9982, + "N Points in Bucket": 10 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 89.29, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 27348, + "Mean Search Latency (ms)": 13.872, + "Mean Recall": 0.9302, + "N Points in Bucket": 9 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 129.23, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11269, + "Mean Search Latency (ms)": 42.858, + "Mean Recall": 0.9719, + "N Points in Bucket": 13 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 185.3, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 2403, + "Mean Search Latency (ms)": 94.376, + "Mean Recall": 0.9954, + "N Points in Bucket": 21 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 114.22, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2436, + "Mean Search Latency (ms)": 176.568, + "Mean Recall": 0.9413, + "N Points in Bucket": 3 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 224.1, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1572, + "Mean Search Latency (ms)": 203.731, + "Mean Recall": 0.9563, + "N Points in Bucket": 4 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 76.06, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 35837, + "Mean Search Latency (ms)": 123.799, + "Mean Recall": 0.9209, + "N Points in Bucket": 5 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 98.4, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 10703, + "Mean Search Latency (ms)": 481.908, + "Mean Recall": 0.9795, + "N Points in Bucket": 11 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 195.09, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2410, + "Mean Search Latency (ms)": 751.321, + "Mean Recall": 0.9963, + "N Points in Bucket": 16 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 135.66, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2352, + "Mean Search Latency (ms)": 1284.847, + "Mean Recall": 0.9424, + "N Points in Bucket": 3 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 223.32, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1233, + "Mean Search Latency (ms)": 1324.779, + "Mean Recall": 0.9561, + "N Points in Bucket": 6 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.86, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 38983, + "Mean Search Latency (ms)": 2.844, + "Mean Recall": 0.9131, + "N Points in Bucket": 1 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.17, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 35843, + "Mean Search Latency (ms)": 7.846, + "Mean Recall": 0.9709, + "N Points in Bucket": 4 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.74, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 15645, + "Mean Search Latency (ms)": 12.862, + "Mean Recall": 0.9974, + "N Points in Bucket": 42 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.96, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 298758, + "Mean Search Latency (ms)": 13.914, + "Mean Recall": 0.9284, + "N Points in Bucket": 7 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.15, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 189566, + "Mean Search Latency (ms)": 13.806, + "Mean Recall": 0.9774, + "N Points in Bucket": 10 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.25, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 62458, + "Mean Search Latency (ms)": 114.422, + "Mean Recall": 0.9958, + "N Points in Bucket": 19 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 15.16, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1144, + "Mean Search Latency (ms)": 0.902, + "Mean Recall": 0.9311, + "N Points in Bucket": 9 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.14, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 578, + "Mean Search Latency (ms)": 2.247, + "Mean Recall": 0.9747, + "N Points in Bucket": 19 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.57, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 188, + "Mean Search Latency (ms)": 259.522, + "Mean Recall": 0.9966, + "N Points in Bucket": 7 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 15.2, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 7895, + "Mean Search Latency (ms)": 1.332, + "Mean Recall": 0.9236, + "N Points in Bucket": 8 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.26, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3856, + "Mean Search Latency (ms)": 2.898, + "Mean Recall": 0.9708, + "N Points in Bucket": 16 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 978, + "Mean Search Latency (ms)": 1493.781, + "Mean Recall": 0.997, + "N Points in Bucket": 31 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 64.97, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1312, + "Mean Search Latency (ms)": 18.573, + "Mean Recall": 0.923, + "N Points in Bucket": 19 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 76.69, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 711, + "Mean Search Latency (ms)": 158.355, + "Mean Recall": 0.9724, + "N Points in Bucket": 16 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 101.42, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 316, + "Mean Search Latency (ms)": 648.719, + "Mean Recall": 0.9955, + "N Points in Bucket": 5 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 64.92, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1310, + "Mean Search Latency (ms)": 165.032, + "Mean Recall": 0.9273, + "N Points in Bucket": 12 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 70.42, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 657, + "Mean Search Latency (ms)": 484.856, + "Mean Recall": 0.9762, + "N Points in Bucket": 16 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 102.77, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 294, + "Mean Search Latency (ms)": 898.319, + "Mean Recall": 0.9958, + "N Points in Bucket": 6 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 23.73, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 30133, + "Mean Search Latency (ms)": 2.684, + "Mean Recall": 0.9317, + "N Points in Bucket": 4 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 25.09, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 19020, + "Mean Search Latency (ms)": 6.492, + "Mean Recall": 0.9788, + "N Points in Bucket": 18 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.56, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 5911, + "Mean Search Latency (ms)": 8.01, + "Mean Recall": 0.9964, + "N Points in Bucket": 40 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 21.82, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 74020, + "Mean Search Latency (ms)": 13.567, + "Mean Recall": 0.9272, + "N Points in Bucket": 2 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 25.78, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 35277, + "Mean Search Latency (ms)": 21.027, + "Mean Recall": 0.984, + "N Points in Bucket": 11 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 6923, + "Mean Search Latency (ms)": 139.686, + "Mean Recall": 0.9969, + "N Points in Bucket": 25 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 44.67, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1301, + "Mean Search Latency (ms)": 2.316, + "Mean Recall": 0.9309, + "N Points in Bucket": 4 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 36.47, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 629, + "Mean Search Latency (ms)": 7.378, + "Mean Recall": 0.9784, + "N Points in Bucket": 8 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 34.82, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 280, + "Mean Search Latency (ms)": 318.926, + "Mean Recall": 0.9953, + "N Points in Bucket": 6 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 38.16, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 5517, + "Mean Search Latency (ms)": 2.349, + "Mean Recall": 0.9253, + "N Points in Bucket": 7 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 34.19, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2523, + "Mean Search Latency (ms)": 269.387, + "Mean Recall": 0.9768, + "N Points in Bucket": 16 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 37.04, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 923, + "Mean Search Latency (ms)": 1632.779, + "Mean Recall": 0.9971, + "N Points in Bucket": 23 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.05, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 38249, + "Mean Search Latency (ms)": 6.546, + "Mean Recall": 0.9804, + "N Points in Bucket": 9 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.95, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11870, + "Mean Search Latency (ms)": 25.83, + "Mean Recall": 0.9971, + "N Points in Bucket": 44 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 9.43, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 178441, + "Mean Search Latency (ms)": 18.652, + "Mean Recall": 0.9274, + "N Points in Bucket": 4 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.34, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 72111, + "Mean Search Latency (ms)": 22.845, + "Mean Recall": 0.9798, + "N Points in Bucket": 16 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.39, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 22046, + "Mean Search Latency (ms)": 753.079, + "Mean Recall": 0.9978, + "N Points in Bucket": 16 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.67, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1853, + "Mean Search Latency (ms)": 1.218, + "Mean Recall": 0.9268, + "N Points in Bucket": 3 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.63, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 671, + "Mean Search Latency (ms)": 2.822, + "Mean Recall": 0.973, + "N Points in Bucket": 17 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.7, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 226, + "Mean Search Latency (ms)": 412.092, + "Mean Recall": 0.9974, + "N Points in Bucket": 8 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.45, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 8547, + "Mean Search Latency (ms)": 1.346, + "Mean Recall": 0.9306, + "N Points in Bucket": 8 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.3, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3444, + "Mean Search Latency (ms)": 3.507, + "Mean Recall": 0.9791, + "N Points in Bucket": 19 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 15.92, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1139, + "Mean Search Latency (ms)": 1455.341, + "Mean Recall": 0.9972, + "N Points in Bucket": 18 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 114.07, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1464, + "Mean Search Latency (ms)": 25.114, + "Mean Recall": 0.9267, + "N Points in Bucket": 12 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 179.91, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 759, + "Mean Search Latency (ms)": 136.958, + "Mean Recall": 0.9756, + "N Points in Bucket": 17 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 219.95, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 352, + "Mean Search Latency (ms)": 462.686, + "Mean Recall": 0.9947, + "N Points in Bucket": 10 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 157.68, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1496, + "Mean Search Latency (ms)": 45.385, + "Mean Recall": 0.9265, + "N Points in Bucket": 13 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 154.35, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 770, + "Mean Search Latency (ms)": 454.886, + "Mean Recall": 0.9757, + "N Points in Bucket": 13 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 258.2, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 323, + "Mean Search Latency (ms)": 1069.649, + "Mean Recall": 0.9956, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.28, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 34776, + "Mean Search Latency (ms)": 6.423, + "Mean Recall": 0.9855, + "N Points in Bucket": 8 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.96, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11716, + "Mean Search Latency (ms)": 12.764, + "Mean Recall": 0.9972, + "N Points in Bucket": 37 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 12.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 176395, + "Mean Search Latency (ms)": 10.523, + "Mean Recall": 0.93, + "N Points in Bucket": 5 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 84601, + "Mean Search Latency (ms)": 15.826, + "Mean Recall": 0.9747, + "N Points in Bucket": 15 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.46, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 26068, + "Mean Search Latency (ms)": 897.21, + "Mean Recall": 0.9977, + "N Points in Bucket": 16 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.97, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1277, + "Mean Search Latency (ms)": 0.941, + "Mean Recall": 0.9311, + "N Points in Bucket": 5 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.43, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 567, + "Mean Search Latency (ms)": 2.094, + "Mean Recall": 0.9757, + "N Points in Bucket": 18 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 17.75, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 211, + "Mean Search Latency (ms)": 277.42, + "Mean Recall": 0.9957, + "N Points in Bucket": 8 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.47, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 8630, + "Mean Search Latency (ms)": 1.192, + "Mean Recall": 0.9216, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 17.1, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 4009, + "Mean Search Latency (ms)": 2.832, + "Mean Recall": 0.9721, + "N Points in Bucket": 19 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 18.16, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1114, + "Mean Search Latency (ms)": 562.567, + "Mean Recall": 0.9966, + "N Points in Bucket": 23 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 159.31, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1502, + "Mean Search Latency (ms)": 23.332, + "Mean Recall": 0.9233, + "N Points in Bucket": 14 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 205.73, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 758, + "Mean Search Latency (ms)": 91.43, + "Mean Recall": 0.9767, + "N Points in Bucket": 11 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 302.47, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 323, + "Mean Search Latency (ms)": 566.174, + "Mean Recall": 0.9972, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 151.36, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1537, + "Mean Search Latency (ms)": 41.827, + "Mean Recall": 0.923, + "N Points in Bucket": 11 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 196.79, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 832, + "Mean Search Latency (ms)": 432.442, + "Mean Recall": 0.9729, + "N Points in Bucket": 15 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 235.94, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 347, + "Mean Search Latency (ms)": 756.032, + "Mean Recall": 0.9942, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 7.35, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 40788, + "Mean Search Latency (ms)": 7.513, + "Mean Recall": 0.9323, + "N Points in Bucket": 1 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 7.35, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 39554, + "Mean Search Latency (ms)": 5.816, + "Mean Recall": 0.9823, + "N Points in Bucket": 3 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.6, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 15196, + "Mean Search Latency (ms)": 13.229, + "Mean Recall": 0.9976, + "N Points in Bucket": 32 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 9.15, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 19355, + "Mean Search Latency (ms)": 5.054, + "Mean Recall": 0.906, + "N Points in Bucket": 7 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.79, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 9418, + "Mean Search Latency (ms)": 24.15, + "Mean Recall": 0.9823, + "N Points in Bucket": 10 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 9.32, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 3980, + "Mean Search Latency (ms)": 17.687, + "Mean Recall": 0.9936, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.04, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 313550, + "Mean Search Latency (ms)": 14.533, + "Mean Recall": 0.9291, + "N Points in Bucket": 4 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.28, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 129726, + "Mean Search Latency (ms)": 14.584, + "Mean Recall": 0.9807, + "N Points in Bucket": 18 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.57, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 52490, + "Mean Search Latency (ms)": 127.814, + "Mean Recall": 0.9949, + "N Points in Bucket": 12 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.49, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1958, + "Mean Search Latency (ms)": 1.035, + "Mean Recall": 0.9321, + "N Points in Bucket": 1 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.56, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 936, + "Mean Search Latency (ms)": 2.506, + "Mean Recall": 0.9696, + "N Points in Bucket": 8 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.78, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 226, + "Mean Search Latency (ms)": 153.373, + "Mean Recall": 0.997, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.84, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 8911, + "Mean Search Latency (ms)": 1.152, + "Mean Recall": 0.9205, + "N Points in Bucket": 5 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 12.82, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3508, + "Mean Search Latency (ms)": 3.423, + "Mean Recall": 0.9774, + "N Points in Bucket": 17 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 12.9, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1178, + "Mean Search Latency (ms)": 1561.954, + "Mean Recall": 0.9971, + "N Points in Bucket": 28 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 90.79, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1496, + "Mean Search Latency (ms)": 17.152, + "Mean Recall": 0.9239, + "N Points in Bucket": 10 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 102.79, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 793, + "Mean Search Latency (ms)": 109.882, + "Mean Recall": 0.9752, + "N Points in Bucket": 15 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 101.19, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 359, + "Mean Search Latency (ms)": 269.405, + "Mean Recall": 0.9938, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 96.62, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1554, + "Mean Search Latency (ms)": 103.732, + "Mean Recall": 0.9244, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 84.54, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 841, + "Mean Search Latency (ms)": 254.994, + "Mean Recall": 0.9728, + "N Points in Bucket": 13 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 119.69, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 343, + "Mean Search Latency (ms)": 782.166, + "Mean Recall": 0.995, + "N Points in Bucket": 8 + } +] \ No newline at end of file diff --git a/fern/build_docs.sh b/fern/build_docs.sh index 3dd001814b..17351fff22 100755 --- a/fern/build_docs.sh +++ b/fern/build_docs.sh @@ -68,6 +68,10 @@ generate_api_reference() { python3 "${SCRIPT_DIR}/scripts/generate_api_reference.py" } +export_performance_dashboard_data() { + python3 "${SCRIPT_DIR}/scripts/export_performance_dashboard_data.py" +} + run_checks() { pushd "${REPO_DIR}" >/dev/null run_fern check --warnings @@ -78,10 +82,12 @@ run_checks() { case "${MODE}" in check) generate_api_reference + export_performance_dashboard_data run_checks ;; preview) generate_api_reference + export_performance_dashboard_data run_checks pushd "${REPO_DIR}" >/dev/null run_fern generate --docs --preview "$@" @@ -89,6 +95,7 @@ case "${MODE}" in ;; publish) generate_api_reference + export_performance_dashboard_data run_checks pushd "${REPO_DIR}" >/dev/null run_fern generate --docs "$@" @@ -96,6 +103,7 @@ case "${MODE}" in ;; dev) generate_api_reference + export_performance_dashboard_data pushd "${REPO_DIR}" >/dev/null run_fern docs dev "$@" popd >/dev/null diff --git a/fern/docs.yml b/fern/docs.yml index 7e42d246ab..6f25ea3fbb 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -47,6 +47,7 @@ css: - "./styles/metrics-table.css" - "./styles/notebook-viewer.css" - "./styles/trajectory-viewer.css" + - "./styles/performance-dashboard.css" experimental: mdx-components: - "./theme/nvidia/components" @@ -96,6 +97,8 @@ navigation: path: "./pages/tuning_guide.md" - page: "Integrations" path: "./pages/integrations.md" + - page: "Performance" + path: "./pages/performance.md" - page: "Use-cases" path: "./pages/use_cases.md" - page: "Using cuVS APIs" diff --git a/fern/pages/performance.md b/fern/pages/performance.md new file mode 100644 index 0000000000..6b695d1284 --- /dev/null +++ b/fern/pages/performance.md @@ -0,0 +1,13 @@ +import PerformanceDashboard from "@/theme/nvidia/components/PerformanceDashboard"; + +# Performance + +Compare cuVS-Bench performance across hardware and algorithms. Adjust the filters to see comparative performance for index build time, search throughput (QPS), and search latency. + + + +## About this data + +These results come from [cuVS-Bench](cuvs_bench/index.md) runs on cuVS 26.04. The source dataset is [MIRACL](https://huggingface.co/datasets/miracl/miracl-corpus), embedded with [Llama Nemotron Embed 1B](https://huggingface.co/nvidia/llama-nemotron-embed-1b-v2). Each row represents a tuned configuration bucketed by recall range (`90%`, `95%`, `99%`). Green bars denote GPU SKUs; blue bars denote CPU SKUs. + +To reproduce or extend these benchmarks, see the [cuVS Bench guide](cuvs_bench/index.md). diff --git a/fern/scripts/export_performance_dashboard_data.py b/fern/scripts/export_performance_dashboard_data.py new file mode 100644 index 0000000000..ae8a1b48af --- /dev/null +++ b/fern/scripts/export_performance_dashboard_data.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +"""Export cuVS bench CSV results to JSON and TypeScript for the Performance dashboard.""" + +from __future__ import annotations + +import csv +import json +from pathlib import Path + +REPO_DIR = Path(__file__).resolve().parents[2] +DEFAULT_CSV = REPO_DIR / "data" / "benchmarks" / "results_cuvs_26_04.csv" +OUTPUT_JSON = REPO_DIR / "fern" / "assets" / "data" / "benchmark_results.json" +BENCHMARK_DATA_TS = ( + REPO_DIR / "fern" / "theme" / "nvidia" / "components" / "benchmarkData.ts" +) + +NUMERIC_COLUMNS = { + "Index Build Time (s)", + "Search Batch Size", + "TopK", + "Mean Search Throughput (QPS)", + "Mean Search Latency (ms)", + "Mean Recall", + "N Points in Bucket", + "Total Vectors", + "Dimensions", +} + + +def parse_row(row: dict[str, str]) -> dict: + parsed: dict = {} + for key, value in row.items(): + if key in NUMERIC_COLUMNS and value not in ("", "NA", None): + try: + parsed[key] = float(value) if "." in value else int(value) + except ValueError: + parsed[key] = value + else: + parsed[key] = value + return parsed + + +def write_benchmark_data_ts(rows: list[dict], output_path: Path) -> None: + output_path.write_text( + """/** + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +export type BenchmarkRow = Record; + +export const BENCHMARK_ROWS: BenchmarkRow[] = """ + + json.dumps(rows, indent=2) + + ";\n", + encoding="utf-8", + ) + + +def export(csv_path: Path = DEFAULT_CSV, output_path: Path = OUTPUT_JSON) -> int: + if not csv_path.is_file(): + raise FileNotFoundError(f"Benchmark CSV not found: {csv_path}") + + with csv_path.open(newline="", encoding="utf-8") as handle: + rows = [parse_row(row) for row in csv.DictReader(handle, delimiter="\t")] + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(rows, indent=2), encoding="utf-8") + print(f"Wrote {len(rows)} rows to {output_path}") + + write_benchmark_data_ts(rows, BENCHMARK_DATA_TS) + print(f"Wrote {len(rows)} rows to {BENCHMARK_DATA_TS}") + + return len(rows) + + +def main() -> int: + export() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/fern/styles/performance-dashboard.css b/fern/styles/performance-dashboard.css new file mode 100644 index 0000000000..17110e64b0 --- /dev/null +++ b/fern/styles/performance-dashboard.css @@ -0,0 +1,307 @@ +.performance-dashboard { + --pd-nv-green: #76b900; + --pd-intel-blue: #0071c5; + --pd-border: #e0e0e0; + --pd-muted: #666; + margin: 0 0 24px; + font-size: 13px; + color: #222; +} + +.performance-dashboard .pd-subtitle { + color: var(--pd-muted); + margin-bottom: 16px; + font-size: 13px; +} + +.performance-dashboard .pd-status { + font-size: 12px; + color: var(--pd-muted); + padding: 4px 0; +} + +.performance-dashboard .pd-status.err { + color: #c00; +} + +.performance-dashboard .pd-filters { + background: #fff; + border: 1px solid var(--pd-border); + border-radius: 8px; + padding: 12px 16px; + margin-bottom: 16px; + display: grid; + grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); + gap: 14px; +} + +.performance-dashboard .pd-filter-group label.pd-title { + display: block; + font-weight: 600; + font-size: 11px; + color: var(--pd-muted); + margin-bottom: 6px; + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.performance-dashboard .pd-ms-dropdown { + position: relative; +} + +.performance-dashboard .pd-ms-btn { + width: 100%; + display: flex; + align-items: center; + justify-content: space-between; + gap: 6px; + padding: 7px 10px; + background: #fff; + border: 1px solid var(--pd-border); + border-radius: 6px; + font-size: 13px; + cursor: pointer; + text-align: left; + min-height: 34px; +} + +.performance-dashboard .pd-ms-btn.open { + border-color: #333; + box-shadow: 0 0 0 2px rgba(0, 0, 0, 0.06); +} + +.performance-dashboard .pd-ms-summary { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + flex: 1; +} + +.performance-dashboard .pd-ms-count-pill { + display: inline-block; + background: #eee; + color: #333; + padding: 1px 7px; + border-radius: 10px; + font-size: 10px; + font-weight: 600; + margin-left: 6px; +} + +.performance-dashboard .pd-ms-panel { + position: absolute; + top: calc(100% + 4px); + left: 0; + right: 0; + background: #fff; + border: 1px solid var(--pd-border); + border-radius: 6px; + box-shadow: 0 6px 18px rgba(0, 0, 0, 0.12); + padding: 6px; + z-index: 30; + max-height: 280px; + overflow-y: auto; +} + +.performance-dashboard .pd-ms-actions { + display: flex; + gap: 6px; + border-bottom: 1px solid #eee; + padding: 4px 6px 6px; + margin-bottom: 4px; +} + +.performance-dashboard .pd-ms-actions button { + flex: 1; + background: #f5f5f5; + border: 1px solid var(--pd-border); + border-radius: 4px; + padding: 4px 6px; + font-size: 11px; + cursor: pointer; +} + +.performance-dashboard .pd-ms-option { + display: flex; + align-items: center; + gap: 8px; + padding: 5px 8px; + border-radius: 4px; + cursor: pointer; + font-size: 13px; +} + +.performance-dashboard .pd-ms-option:hover { + background: #f5f5f5; +} + +.performance-dashboard .pd-swatch { + display: inline-block; + width: 9px; + height: 9px; + border-radius: 50%; + flex-shrink: 0; +} + +.performance-dashboard .pd-swatch.gpu { + background: var(--pd-nv-green); +} + +.performance-dashboard .pd-swatch.cpu { + background: var(--pd-intel-blue); +} + +.performance-dashboard .pd-chart-card { + background: #fff; + border: 1px solid var(--pd-border); + border-radius: 8px; + padding: 14px; + margin-bottom: 16px; +} + +.performance-dashboard .pd-chart-card h3 { + margin: 0; + font-size: 15px; + font-weight: 600; +} + +.performance-dashboard .pd-subhead { + font-size: 11px; + color: var(--pd-muted); + margin-bottom: 10px; +} + +.performance-dashboard .pd-card-head { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 10px; + margin-bottom: 4px; +} + +.performance-dashboard .pd-sort-toggle { + display: inline-flex; + border: 1px solid var(--pd-border); + border-radius: 6px; + overflow: hidden; + background: #fff; + flex-shrink: 0; +} + +.performance-dashboard .pd-sort-toggle button { + background: #fff; + border: none; + border-left: 1px solid var(--pd-border); + font-size: 11px; + padding: 5px 10px; + color: var(--pd-muted); + cursor: pointer; +} + +.performance-dashboard .pd-sort-toggle button:first-child { + border-left: none; +} + +.performance-dashboard .pd-sort-toggle button.active { + background: #333; + color: #fff; +} + +.performance-dashboard .pd-sort-label { + font-size: 10px; + color: var(--pd-muted); + align-self: center; + text-transform: uppercase; + letter-spacing: 0.05em; + font-weight: 600; + margin-right: 4px; +} + +.performance-dashboard .pd-chart-wrap { + position: relative; + height: 380px; +} + +.performance-dashboard .pd-series-legend { + display: flex; + flex-wrap: wrap; + gap: 8px 14px; + margin-top: 10px; + padding-top: 10px; + border-top: 1px solid #f0f0f0; +} + +.performance-dashboard .pd-series-legend .item { + display: inline-flex; + align-items: center; + gap: 6px; + font-size: 11px; +} + +.performance-dashboard .pd-series-legend .swatch-sq { + display: inline-block; + width: 11px; + height: 11px; + border-radius: 2px; +} + +.performance-dashboard .pd-table-panel { + background: #fff; + border: 1px solid var(--pd-border); + border-radius: 8px; + padding: 14px; +} + +.performance-dashboard .pd-table-panel h3 { + margin: 0 0 10px; + font-size: 14px; +} + +.performance-dashboard table { + width: 100%; + border-collapse: collapse; + font-size: 12px; +} + +.performance-dashboard th, +.performance-dashboard td { + padding: 6px 10px; + text-align: left; + border-bottom: 1px solid #f0f0f0; +} + +.performance-dashboard th { + background: #f7f7f7; + color: #444; + font-weight: 600; + position: sticky; + top: 0; + font-size: 11px; + text-transform: uppercase; +} + +.performance-dashboard tbody tr:hover { + background: #fafafa; +} + +.performance-dashboard .pd-hw-dot { + display: inline-block; + width: 8px; + height: 8px; + border-radius: 50%; + margin-right: 6px; + vertical-align: middle; +} + +.performance-dashboard .pd-hw-dot.gpu { + background: var(--pd-nv-green); +} + +.performance-dashboard .pd-hw-dot.cpu { + background: var(--pd-intel-blue); +} + +.performance-dashboard .pd-table-scroll { + max-height: 400px; + overflow: auto; +} diff --git a/fern/theme/nvidia/components/PerformanceDashboard.tsx b/fern/theme/nvidia/components/PerformanceDashboard.tsx new file mode 100644 index 0000000000..0ec47d4785 --- /dev/null +++ b/fern/theme/nvidia/components/PerformanceDashboard.tsx @@ -0,0 +1,482 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +import { useEffect, useMemo, useRef, useState } from "react"; +import { BENCHMARK_ROWS } from "./benchmarkData"; +import { + applyFilters, + BUCKET_LABEL, + buildChartData, + buildFilterOptions, + ChartDataResult, + defaultFilters, + describeFilterSelection, + FILTER_KEYS, + FilterKey, + fmtBar, + labelFor, + labelPlural, + SeriesLegendItem, + sortTableRows, + SortMode, + TABLE_COLS, + type BenchmarkRow, +} from "./performanceDashboardLogic"; + +declare global { + interface Window { + Chart?: any; + ChartDataLabels?: { default: unknown }; + } +} + +const CHART_JS_URL = + "https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"; +const CHART_DATALABELS_URL = + "https://cdn.jsdelivr.net/npm/chartjs-plugin-datalabels@2.2.0/dist/chartjs-plugin-datalabels.min.js"; + +const FILTER_LABELS: Record = { + algo: "Algorithm", + gpu: "GPU SKU", + cpu: "CPU SKU", + bs: "Search Batch Size", + topk: "TopK", + totalVectors: "Total Vectors", + dims: "Dimensions", + dtype: "dtype", + mode: "Mode", +}; + +const FILTER_COLOR_MODE: Partial> = { + gpu: "gpu", + cpu: "cpu", +}; + +const CHARTS: { + id: string; + title: string; + metric: string; + yLabel: string; +}[] = [ + { + id: "chart-build", + title: "Index Build Time (s)", + metric: "Index Build Time (s)", + yLabel: "Build time (s)", + }, + { + id: "chart-qps", + title: "Search Throughput (QPS)", + metric: "Mean Search Throughput (QPS)", + yLabel: "QPS", + }, + { + id: "chart-lat", + title: "Search Latency (ms)", + metric: "Mean Search Latency (ms)", + yLabel: "Latency (ms)", + }, +]; + +let chartLoaderPromise: Promise | null = null; + +function loadChartJs(): Promise { + if (typeof window === "undefined") return Promise.resolve(); + if (window.Chart && window.ChartDataLabels) return Promise.resolve(); + if (chartLoaderPromise) return chartLoaderPromise; + + chartLoaderPromise = new Promise((resolve, reject) => { + const script = document.createElement("script"); + script.src = CHART_JS_URL; + script.async = true; + script.onload = () => { + const plugin = document.createElement("script"); + plugin.src = CHART_DATALABELS_URL; + plugin.async = true; + plugin.onload = () => { + if (window.Chart && window.ChartDataLabels) { + const pluginModule = + (window.ChartDataLabels as { default?: unknown }).default ?? + window.ChartDataLabels; + window.Chart.register(pluginModule); + } + resolve(); + }; + plugin.onerror = () => reject(new Error("Failed to load Chart.js datalabels plugin")); + document.head.appendChild(plugin); + }; + script.onerror = () => reject(new Error("Failed to load Chart.js")); + document.head.appendChild(script); + }); + + return chartLoaderPromise; +} + +function toggleValue(values: T[], value: T): T[] { + const index = values.indexOf(value); + if (index === -1) return [...values, value]; + return values.filter((_, i) => i !== index); +} + +function MultiSelectFilter({ + filterKey, + options, + selected, + onChange, +}: { + filterKey: FilterKey; + options: (string | number)[]; + selected: (string | number)[]; + onChange: (values: (string | number)[]) => void; +}) { + const [open, setOpen] = useState(false); + const rootRef = useRef(null); + const colorMode = FILTER_COLOR_MODE[filterKey]; + + useEffect(() => { + const onDocumentClick = (event: MouseEvent) => { + if (rootRef.current && !rootRef.current.contains(event.target as Node)) { + setOpen(false); + } + }; + document.addEventListener("click", onDocumentClick); + return () => document.removeEventListener("click", onDocumentClick); + }, []); + + const summary = + selected.length === 0 + ? `No ${labelPlural(filterKey)}` + : selected.length === options.length + ? `All ${labelPlural(filterKey)}` + : selected.map((value) => labelFor(filterKey, value)).join(", "); + + return ( +
+ +
+ + {open ? ( +
+
+ + +
+ {options.map((value) => ( + + ))} +
+ ) : null} +
+
+ ); +} + +function SeriesLegend({ chartData }: { chartData: ChartDataResult | null }) { + if (!chartData?.allSeries.length) { + return No series to show — adjust filters.; + } + + return ( + <> + {chartData.sorted && chartData.anchorBucket ? ( + + Legend ordered by {BUCKET_LABEL[chartData.anchorBucket]} bucket: + + ) : null} + {chartData.allSeries.map((item: SeriesLegendItem) => ( + + + {item.label} + + ))} + + ); +} + +function ChartPanel({ + chartId, + title, + metric, + yLabel, + subhead, + rows, + sortMode, + onSortModeChange, + chartsReady, +}: { + chartId: string; + title: string; + metric: string; + yLabel: string; + subhead: string; + rows: BenchmarkRow[]; + sortMode: SortMode; + onSortModeChange: (mode: SortMode) => void; + chartsReady: boolean; +}) { + const canvasRef = useRef(null); + const chartRef = useRef(null); + const chartData = useMemo( + () => (rows.length ? buildChartData(rows, metric, sortMode) : null), + [rows, metric, sortMode], + ); + + useEffect(() => { + if (!chartsReady || !canvasRef.current || !chartData || !window.Chart) { + return; + } + + const totalBars = chartData.datasets.reduce( + (count, dataset) => + count + dataset.data.filter((value) => value != null).length, + 0, + ); + const showLabels = totalBars > 0 && totalBars <= 30; + + chartRef.current?.destroy(); + chartRef.current = new window.Chart(canvasRef.current, { + type: "bar", + data: chartData, + options: { + responsive: true, + maintainAspectRatio: false, + layout: { padding: { top: 18 } }, + plugins: { + legend: { display: false }, + tooltip: { + callbacks: { + label(context: any) { + const value = context.parsed.y; + const dataset = context.dataset as ChartDataResult["datasets"][number]; + const meta = dataset._seriesMeta?.[context.dataIndex]; + const label = meta?.label ?? dataset.label; + if (value == null) return `${label}: (no data)`; + return `${label}: ${value >= 1000 ? value.toLocaleString() : value.toFixed(2)}`; + }, + }, + }, + datalabels: { + display: showLabels, + anchor: "end", + align: "end", + clamp: true, + offset: 2, + color: "#333", + font: { size: 9, weight: "600" }, + formatter: (value: number | null) => fmtBar(value), + }, + }, + scales: { + x: { + title: { display: true, text: "Recall" }, + grid: { display: false }, + }, + y: { + title: { display: true, text: yLabel }, + beginAtZero: true, + }, + }, + }, + }); + + return () => { + chartRef.current?.destroy(); + chartRef.current = null; + }; + }, [chartData, chartsReady, yLabel]); + + return ( +
+
+

{title}

+
+ Sort in-bucket: + {(["default", "desc", "asc"] as SortMode[]).map((mode) => ( + + ))} +
+
+
{subhead}
+
+ +
+
+ +
+
+ ); +} + +function ResultsTable({ rows }: { rows: BenchmarkRow[] }) { + const sortedRows = useMemo(() => sortTableRows(rows), [rows]); + + return ( +
+

Raw data ({sortedRows.length})

+
+ + + + {TABLE_COLS.map((column) => ( + + ))} + + + + {sortedRows.map((row, index) => ( + + {TABLE_COLS.map((column) => { + const value = row[column]; + if (column === "Hardware Type") { + const cls = value === "GPU" ? "gpu" : "cpu"; + return ( + + ); + } + if (column === "Mean Recall" && typeof value === "number") { + return ; + } + if (typeof value === "number" && value >= 1000) { + return ; + } + if (typeof value === "number") { + return ( + + ); + } + return ; + })} + + ))} + +
{column}
+ + {value} + {`${(value * 100).toFixed(1)}%`}{value.toLocaleString()}{Math.round(value * 1000) / 1000}{value ?? ""}
+
+
+ ); +} + +export default function PerformanceDashboard() { + const [mounted, setMounted] = useState(false); + const allRows = BENCHMARK_ROWS; + const options = useMemo(() => buildFilterOptions(allRows), [allRows]); + const [filters, setFilters] = useState>(() => + defaultFilters(options), + ); + const [sortModes, setSortModes] = useState>({ + "chart-build": "default", + "chart-qps": "default", + "chart-lat": "default", + }); + const [chartsReady, setChartsReady] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + setMounted(true); + loadChartJs() + .then(() => setChartsReady(true)) + .catch((err: Error) => setError(err.message)); + }, []); + + const filteredRows = useMemo( + () => applyFilters(allRows, filters), + [allRows, filters], + ); + + const filterSummary = useMemo( + () => `${describeFilterSelection(filters, options)} · ${filteredRows.length} rows`, + [filters, filteredRows.length, options], + ); + + if (!mounted) { + return ( +
+
Loading dashboard…
+
+ ); + } + + if (error) { + return ( +
+
Error: {error}
+
+ ); + } + + return ( +
+
+ {FILTER_KEYS.map((filterKey) => ( + + setFilters((current) => ({ ...current, [filterKey]: values })) + } + /> + ))} +
+ + {CHARTS.map((chart) => ( + + setSortModes((current) => ({ ...current, [chart.id]: mode })) + } + chartsReady={chartsReady} + /> + ))} + + +
+ ); +} diff --git a/fern/theme/nvidia/components/benchmarkData.ts b/fern/theme/nvidia/components/benchmarkData.ts new file mode 100644 index 0000000000..e039b46cbd --- /dev/null +++ b/fern/theme/nvidia/components/benchmarkData.ts @@ -0,0 +1,2460 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +export type BenchmarkRow = Record; + +export const BENCHMARK_ROWS: BenchmarkRow[] = [ + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 107, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 41744, + "Mean Search Latency (ms)": 7.184, + "Mean Recall": 0.93, + "N Points in Bucket": 7 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 110, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 20724, + "Mean Search Latency (ms)": 15.658, + "Mean Recall": 0.9694, + "N Points in Bucket": 8 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 169, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 3507, + "Mean Search Latency (ms)": 51.33, + "Mean Recall": 0.9954, + "N Points in Bucket": 15 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 107, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2890, + "Mean Search Latency (ms)": 39.745, + "Mean Recall": 0.9226, + "N Points in Bucket": 7 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 195, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1366, + "Mean Search Latency (ms)": 50.766, + "Mean Recall": 0.9569, + "N Points in Bucket": 5 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 106, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 44388, + "Mean Search Latency (ms)": 66.702, + "Mean Recall": 0.9278, + "N Points in Bucket": 7 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 99.87, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 17214, + "Mean Search Latency (ms)": 198.402, + "Mean Recall": 0.9761, + "N Points in Bucket": 10 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 169.81, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3360, + "Mean Search Latency (ms)": 395.769, + "Mean Recall": 0.9955, + "N Points in Bucket": 14 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 99.2, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2720, + "Mean Search Latency (ms)": 401.52, + "Mean Recall": 0.9283, + "N Points in Bucket": 9 + }, + { + "SKU": "8480_plus", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x Intel 8480+, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 194.66, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1543, + "Mean Search Latency (ms)": 498.517, + "Mean Recall": 0.9572, + "N Points in Bucket": 3 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.11, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 34073, + "Mean Search Latency (ms)": 7.821, + "Mean Recall": 0.9289, + "N Points in Bucket": 4 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.63, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 29745, + "Mean Search Latency (ms)": 9.03, + "Mean Recall": 0.9806, + "N Points in Bucket": 10 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.66, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11589, + "Mean Search Latency (ms)": 10.219, + "Mean Recall": 0.997, + "N Points in Bucket": 43 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.43, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 189221, + "Mean Search Latency (ms)": 9.328, + "Mean Recall": 0.9291, + "N Points in Bucket": 2 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.93, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 98065, + "Mean Search Latency (ms)": 9.474, + "Mean Recall": 0.9805, + "N Points in Bucket": 20 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.94, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 39666, + "Mean Search Latency (ms)": 102.733, + "Mean Recall": 0.997, + "N Points in Bucket": 21 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 22.97, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1418, + "Mean Search Latency (ms)": 5.262, + "Mean Recall": 0.9129, + "N Points in Bucket": 7 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 24.9, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 624, + "Mean Search Latency (ms)": 7.651, + "Mean Recall": 0.9776, + "N Points in Bucket": 9 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 23.83, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 269, + "Mean Search Latency (ms)": 973.856, + "Mean Recall": 0.9981, + "N Points in Bucket": 4 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 39.24, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 4324, + "Mean Search Latency (ms)": 2.818, + "Mean Recall": 0.9283, + "N Points in Bucket": 9 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.28, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2334, + "Mean Search Latency (ms)": 1368.22, + "Mean Recall": 0.9762, + "N Points in Bucket": 20 + }, + { + "SKU": "A100-SXM4-80GB", + "Hardware Type": "GPU", + "GPU": "A100-SXM4-80GB", + "CPU": "2x AMD EPYC 7742, 256 Threads, HT=On, Zen2 Rome", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 27.24, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 954, + "Mean Search Latency (ms)": 1878.792, + "Mean Recall": 0.997, + "N Points in Bucket": 21 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 19.25, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 27700, + "Mean Search Latency (ms)": 5.851, + "Mean Recall": 0.9306, + "N Points in Bucket": 4 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 18.94, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 21962, + "Mean Search Latency (ms)": 6.799, + "Mean Recall": 0.9816, + "N Points in Bucket": 12 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 21.85, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 7613, + "Mean Search Latency (ms)": 9.201, + "Mean Recall": 0.9967, + "N Points in Bucket": 31 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 18.67, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 103569, + "Mean Search Latency (ms)": 6.504, + "Mean Recall": 0.9284, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 20.23, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 59180, + "Mean Search Latency (ms)": 9.946, + "Mean Recall": 0.9835, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 21.01, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 14266, + "Mean Search Latency (ms)": 71.157, + "Mean Recall": 0.9967, + "N Points in Bucket": 24 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 45.33, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1313, + "Mean Search Latency (ms)": 3.499, + "Mean Recall": 0.9297, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.85, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 960, + "Mean Search Latency (ms)": 3.308, + "Mean Recall": 0.9646, + "N Points in Bucket": 2 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 27.54, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 320, + "Mean Search Latency (ms)": 356.949, + "Mean Recall": 0.996, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 29.83, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 5633, + "Mean Search Latency (ms)": 6.365, + "Mean Recall": 0.9123, + "N Points in Bucket": 7 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 27.55, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2540, + "Mean Search Latency (ms)": 362.402, + "Mean Recall": 0.9744, + "N Points in Bucket": 17 + }, + { + "SKU": "A10G", + "Hardware Type": "GPU", + "GPU": "A10G", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 29.89, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 841, + "Mean Search Latency (ms)": 1737.822, + "Mean Recall": 0.9982, + "N Points in Bucket": 10 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 89.29, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 27348, + "Mean Search Latency (ms)": 13.872, + "Mean Recall": 0.9302, + "N Points in Bucket": 9 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 129.23, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11269, + "Mean Search Latency (ms)": 42.858, + "Mean Recall": 0.9719, + "N Points in Bucket": 13 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 185.3, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 2403, + "Mean Search Latency (ms)": 94.376, + "Mean Recall": 0.9954, + "N Points in Bucket": 21 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 114.22, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2436, + "Mean Search Latency (ms)": 176.568, + "Mean Recall": 0.9413, + "N Points in Bucket": 3 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 224.1, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1572, + "Mean Search Latency (ms)": 203.731, + "Mean Recall": 0.9563, + "N Points in Bucket": 4 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 76.06, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 35837, + "Mean Search Latency (ms)": 123.799, + "Mean Recall": 0.9209, + "N Points in Bucket": 5 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 98.4, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 10703, + "Mean Search Latency (ms)": 481.908, + "Mean Recall": 0.9795, + "N Points in Bucket": 11 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 195.09, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2410, + "Mean Search Latency (ms)": 751.321, + "Mean Recall": 0.9963, + "N Points in Bucket": 16 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 135.66, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 2352, + "Mean Search Latency (ms)": 1284.847, + "Mean Recall": 0.9424, + "N Points in Bucket": 3 + }, + { + "SKU": "Genoa_9654", + "Hardware Type": "CPU", + "GPU": "NA", + "CPU": "2x AMD EPYC 9654, 384 Threads, HT=On, Zen4 Genoa", + "dtype": "FP32", + "cuVS Algo": "hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 223.32, + "Search Batch Size": 10, + "TopK": 1000, + "Mean Search Throughput (QPS)": 1233, + "Mean Search Latency (ms)": 1324.779, + "Mean Recall": 0.9561, + "N Points in Bucket": 6 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.86, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 38983, + "Mean Search Latency (ms)": 2.844, + "Mean Recall": 0.9131, + "N Points in Bucket": 1 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.17, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 35843, + "Mean Search Latency (ms)": 7.846, + "Mean Recall": 0.9709, + "N Points in Bucket": 4 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.74, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 15645, + "Mean Search Latency (ms)": 12.862, + "Mean Recall": 0.9974, + "N Points in Bucket": 42 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.96, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 298758, + "Mean Search Latency (ms)": 13.914, + "Mean Recall": 0.9284, + "N Points in Bucket": 7 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.15, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 189566, + "Mean Search Latency (ms)": 13.806, + "Mean Recall": 0.9774, + "N Points in Bucket": 10 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.25, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 62458, + "Mean Search Latency (ms)": 114.422, + "Mean Recall": 0.9958, + "N Points in Bucket": 19 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 15.16, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1144, + "Mean Search Latency (ms)": 0.902, + "Mean Recall": 0.9311, + "N Points in Bucket": 9 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.14, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 578, + "Mean Search Latency (ms)": 2.247, + "Mean Recall": 0.9747, + "N Points in Bucket": 19 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.57, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 188, + "Mean Search Latency (ms)": 259.522, + "Mean Recall": 0.9966, + "N Points in Bucket": 7 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 15.2, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 7895, + "Mean Search Latency (ms)": 1.332, + "Mean Recall": 0.9236, + "N Points in Bucket": 8 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.26, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3856, + "Mean Search Latency (ms)": 2.898, + "Mean Recall": 0.9708, + "N Points in Bucket": 16 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 978, + "Mean Search Latency (ms)": 1493.781, + "Mean Recall": 0.997, + "N Points in Bucket": 31 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 64.97, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1312, + "Mean Search Latency (ms)": 18.573, + "Mean Recall": 0.923, + "N Points in Bucket": 19 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 76.69, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 711, + "Mean Search Latency (ms)": 158.355, + "Mean Recall": 0.9724, + "N Points in Bucket": 16 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 101.42, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 316, + "Mean Search Latency (ms)": 648.719, + "Mean Recall": 0.9955, + "N Points in Bucket": 5 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 64.92, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1310, + "Mean Search Latency (ms)": 165.032, + "Mean Recall": 0.9273, + "N Points in Bucket": 12 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 70.42, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 657, + "Mean Search Latency (ms)": 484.856, + "Mean Recall": 0.9762, + "N Points in Bucket": 16 + }, + { + "SKU": "H100-SXM-80GB", + "Hardware Type": "GPU", + "GPU": "H100-SXM-80GB", + "CPU": "2x Intel 8480CL, 224 Threads, HT=On, Sapphire Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 102.77, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 294, + "Mean Search Latency (ms)": 898.319, + "Mean Recall": 0.9958, + "N Points in Bucket": 6 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 23.73, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 30133, + "Mean Search Latency (ms)": 2.684, + "Mean Recall": 0.9317, + "N Points in Bucket": 4 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 25.09, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 19020, + "Mean Search Latency (ms)": 6.492, + "Mean Recall": 0.9788, + "N Points in Bucket": 18 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.56, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 5911, + "Mean Search Latency (ms)": 8.01, + "Mean Recall": 0.9964, + "N Points in Bucket": 40 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 21.82, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 74020, + "Mean Search Latency (ms)": 13.567, + "Mean Recall": 0.9272, + "N Points in Bucket": 2 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 25.78, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 35277, + "Mean Search Latency (ms)": 21.027, + "Mean Recall": 0.984, + "N Points in Bucket": 11 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 26.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 6923, + "Mean Search Latency (ms)": 139.686, + "Mean Recall": 0.9969, + "N Points in Bucket": 25 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 44.67, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1301, + "Mean Search Latency (ms)": 2.316, + "Mean Recall": 0.9309, + "N Points in Bucket": 4 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 36.47, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 629, + "Mean Search Latency (ms)": 7.378, + "Mean Recall": 0.9784, + "N Points in Bucket": 8 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 34.82, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 280, + "Mean Search Latency (ms)": 318.926, + "Mean Recall": 0.9953, + "N Points in Bucket": 6 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 38.16, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 5517, + "Mean Search Latency (ms)": 2.349, + "Mean Recall": 0.9253, + "N Points in Bucket": 7 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 34.19, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 2523, + "Mean Search Latency (ms)": 269.387, + "Mean Recall": 0.9768, + "N Points in Bucket": 16 + }, + { + "SKU": "L4", + "Hardware Type": "GPU", + "GPU": "L4", + "CPU": "2x AMD EPYC 7763, 128 Threads, HT=Off, Zen3 Milan", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 37.04, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 923, + "Mean Search Latency (ms)": 1632.779, + "Mean Recall": 0.9971, + "N Points in Bucket": 23 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.05, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 38249, + "Mean Search Latency (ms)": 6.546, + "Mean Recall": 0.9804, + "N Points in Bucket": 9 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.95, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11870, + "Mean Search Latency (ms)": 25.83, + "Mean Recall": 0.9971, + "N Points in Bucket": 44 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 9.43, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 178441, + "Mean Search Latency (ms)": 18.652, + "Mean Recall": 0.9274, + "N Points in Bucket": 4 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.34, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 72111, + "Mean Search Latency (ms)": 22.845, + "Mean Recall": 0.9798, + "N Points in Bucket": 16 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.39, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 22046, + "Mean Search Latency (ms)": 753.079, + "Mean Recall": 0.9978, + "N Points in Bucket": 16 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.67, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1853, + "Mean Search Latency (ms)": 1.218, + "Mean Recall": 0.9268, + "N Points in Bucket": 3 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.63, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 671, + "Mean Search Latency (ms)": 2.822, + "Mean Recall": 0.973, + "N Points in Bucket": 17 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.7, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 226, + "Mean Search Latency (ms)": 412.092, + "Mean Recall": 0.9974, + "N Points in Bucket": 8 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.45, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 8547, + "Mean Search Latency (ms)": 1.346, + "Mean Recall": 0.9306, + "N Points in Bucket": 8 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.3, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3444, + "Mean Search Latency (ms)": 3.507, + "Mean Recall": 0.9791, + "N Points in Bucket": 19 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 15.92, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1139, + "Mean Search Latency (ms)": 1455.341, + "Mean Recall": 0.9972, + "N Points in Bucket": 18 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 114.07, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1464, + "Mean Search Latency (ms)": 25.114, + "Mean Recall": 0.9267, + "N Points in Bucket": 12 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 179.91, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 759, + "Mean Search Latency (ms)": 136.958, + "Mean Recall": 0.9756, + "N Points in Bucket": 17 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 219.95, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 352, + "Mean Search Latency (ms)": 462.686, + "Mean Recall": 0.9947, + "N Points in Bucket": 10 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 157.68, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1496, + "Mean Search Latency (ms)": 45.385, + "Mean Recall": 0.9265, + "N Points in Bucket": 13 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 154.35, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 770, + "Mean Search Latency (ms)": 454.886, + "Mean Recall": 0.9757, + "N Points in Bucket": 13 + }, + { + "SKU": "L40S", + "Hardware Type": "GPU", + "GPU": "L40S", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 258.2, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 323, + "Mean Search Latency (ms)": 1069.649, + "Mean Recall": 0.9956, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.28, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 34776, + "Mean Search Latency (ms)": 6.423, + "Mean Recall": 0.9855, + "N Points in Bucket": 8 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.96, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 11716, + "Mean Search Latency (ms)": 12.764, + "Mean Recall": 0.9972, + "N Points in Bucket": 37 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 12.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 176395, + "Mean Search Latency (ms)": 10.523, + "Mean Recall": 0.93, + "N Points in Bucket": 5 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 13.88, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 84601, + "Mean Search Latency (ms)": 15.826, + "Mean Recall": 0.9747, + "N Points in Bucket": 15 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 14.46, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 26068, + "Mean Search Latency (ms)": 897.21, + "Mean Recall": 0.9977, + "N Points in Bucket": 16 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.97, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1277, + "Mean Search Latency (ms)": 0.941, + "Mean Recall": 0.9311, + "N Points in Bucket": 5 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.43, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 567, + "Mean Search Latency (ms)": 2.094, + "Mean Recall": 0.9757, + "N Points in Bucket": 18 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 17.75, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 211, + "Mean Search Latency (ms)": 277.42, + "Mean Recall": 0.9957, + "N Points in Bucket": 8 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 16.47, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 8630, + "Mean Search Latency (ms)": 1.192, + "Mean Recall": 0.9216, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 17.1, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 4009, + "Mean Search Latency (ms)": 2.832, + "Mean Recall": 0.9721, + "N Points in Bucket": 19 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 18.16, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1114, + "Mean Search Latency (ms)": 562.567, + "Mean Recall": 0.9966, + "N Points in Bucket": 23 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 159.31, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1502, + "Mean Search Latency (ms)": 23.332, + "Mean Recall": 0.9233, + "N Points in Bucket": 14 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 205.73, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 758, + "Mean Search Latency (ms)": 91.43, + "Mean Recall": 0.9767, + "N Points in Bucket": 11 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 302.47, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 323, + "Mean Search Latency (ms)": 566.174, + "Mean Recall": 0.9972, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 151.36, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1537, + "Mean Search Latency (ms)": 41.827, + "Mean Recall": 0.923, + "N Points in Bucket": 11 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 196.79, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 832, + "Mean Search Latency (ms)": 432.442, + "Mean Recall": 0.9729, + "N Points in Bucket": 15 + }, + { + "SKU": "RTX_PRO_4500_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 4500 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 235.94, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 347, + "Mean Search Latency (ms)": 756.032, + "Mean Recall": 0.9942, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 7.35, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 40788, + "Mean Search Latency (ms)": 7.513, + "Mean Recall": 0.9323, + "N Points in Bucket": 1 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 7.35, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 39554, + "Mean Search Latency (ms)": 5.816, + "Mean Recall": 0.9823, + "N Points in Bucket": 3 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.6, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 15196, + "Mean Search Latency (ms)": 13.229, + "Mean Recall": 0.9976, + "N Points in Bucket": 32 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 9.15, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 19355, + "Mean Search Latency (ms)": 5.054, + "Mean Recall": 0.906, + "N Points in Bucket": 7 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.79, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 9418, + "Mean Search Latency (ms)": 24.15, + "Mean Recall": 0.9823, + "N Points in Bucket": 10 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 9.32, + "Search Batch Size": 1, + "TopK": 1000, + "Mean Search Throughput (QPS)": 3980, + "Mean Search Latency (ms)": 17.687, + "Mean Recall": 0.9936, + "N Points in Bucket": 6 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.04, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 313550, + "Mean Search Latency (ms)": 14.533, + "Mean Recall": 0.9291, + "N Points in Bucket": 4 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.28, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 129726, + "Mean Search Latency (ms)": 14.584, + "Mean Recall": 0.9807, + "N Points in Bucket": 18 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 8.57, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 52490, + "Mean Search Latency (ms)": 127.814, + "Mean Recall": 0.9949, + "N Points in Bucket": 12 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 10.49, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1958, + "Mean Search Latency (ms)": 1.035, + "Mean Recall": 0.9321, + "N Points in Bucket": 1 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.56, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 936, + "Mean Search Latency (ms)": 2.506, + "Mean Recall": 0.9696, + "N Points in Bucket": 8 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.78, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 226, + "Mean Search Latency (ms)": 153.373, + "Mean Recall": 0.997, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 11.84, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 8911, + "Mean Search Latency (ms)": 1.152, + "Mean Recall": 0.9205, + "N Points in Bucket": 5 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 12.82, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 3508, + "Mean Search Latency (ms)": 3.423, + "Mean Recall": 0.9774, + "N Points in Bucket": 17 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_cagra_hnswlib", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 12.9, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1178, + "Mean Search Latency (ms)": 1561.954, + "Mean Recall": 0.9971, + "N Points in Bucket": 28 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 90.79, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 1496, + "Mean Search Latency (ms)": 17.152, + "Mean Recall": 0.9239, + "N Points in Bucket": 10 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 102.79, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 793, + "Mean Search Latency (ms)": 109.882, + "Mean Recall": 0.9752, + "N Points in Bucket": 15 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 101.19, + "Search Batch Size": 1, + "TopK": 10, + "Mean Search Throughput (QPS)": 359, + "Mean Search Latency (ms)": 269.405, + "Mean Recall": 0.9938, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[90-95%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 96.62, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 1554, + "Mean Search Latency (ms)": 103.732, + "Mean Recall": 0.9244, + "N Points in Bucket": 9 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": "[95-99%)", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 84.54, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 841, + "Mean Search Latency (ms)": 254.994, + "Mean Recall": 0.9728, + "N Points in Bucket": 13 + }, + { + "SKU": "RTX_PRO_6000_BSE", + "Hardware Type": "GPU", + "GPU": "RTX PRO 6000 Blackwell", + "CPU": "2x Intel 8570, 224 Threads, HT=On, Emerald Rapids", + "dtype": "FP32", + "cuVS Algo": "cuvs_vamana", + "Mode": "throughput", + "Recall Range": ">=99%", + "Total Vectors": 1000000, + "Dimensions": 1024, + "Index Build Time (s)": 119.69, + "Search Batch Size": 10, + "TopK": 10, + "Mean Search Throughput (QPS)": 343, + "Mean Search Latency (ms)": 782.166, + "Mean Recall": 0.995, + "N Points in Bucket": 8 + } +]; diff --git a/fern/theme/nvidia/components/performanceDashboardLogic.ts b/fern/theme/nvidia/components/performanceDashboardLogic.ts new file mode 100644 index 0000000000..f6a66c88e2 --- /dev/null +++ b/fern/theme/nvidia/components/performanceDashboardLogic.ts @@ -0,0 +1,433 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { BenchmarkRow } from "./benchmarkData"; + +export type { BenchmarkRow }; + +export type FilterKey = + | "algo" + | "gpu" + | "cpu" + | "bs" + | "topk" + | "totalVectors" + | "dims" + | "dtype" + | "mode"; + +export type SortMode = "default" | "desc" | "asc"; + +export const BUCKET_ORDER = ["[90-95%)", "[95-99%)", ">=99%"] as const; +export const BUCKET_LABEL: Record = { + "[90-95%)": "90%", + "[95-99%)": "95%", + ">=99%": "99%", +}; + +export const FILTER_KEYS: FilterKey[] = [ + "algo", + "gpu", + "cpu", + "bs", + "topk", + "totalVectors", + "dims", + "dtype", + "mode", +]; + +export const TABLE_COLS = [ + "SKU", + "Hardware Type", + "GPU", + "CPU", + "cuVS Algo", + "Mode", + "Recall Range", + "Search Batch Size", + "TopK", + "Index Build Time (s)", + "Mean Search Throughput (QPS)", + "Mean Search Latency (ms)", + "Mean Recall", + "N Points in Bucket", +]; + +const NV_GREEN_SHADES = [ + "#76B900", + "#558700", + "#3E6600", + "#A3D82C", + "#C7E867", + "#8FC733", + "#2A4A00", + "#DFF1A8", +]; +const INTEL_BLUE_SHADES = [ + "#0071C5", + "#004F8F", + "#00355E", + "#3393D6", + "#66AEE0", + "#99C9EA", + "#002038", + "#CCE4F4", +]; + +function shortGpu(g: string | number | null): string { + return String(g) + .replace(" Blackwell", " BSE") + .replace("-SXM4-80GB", "") + .replace("-SXM-80GB", "") + .replace("-80GB", ""); +} + +function shortCpu(c: string | number | null): string { + const match = String(c).match(/^(\d+x\s+)?([^,]+)/); + return match ? match[2].trim() : String(c); +} + +function colorFor(key: string, isGpu: boolean): string { + const palette = isGpu ? NV_GREEN_SHADES : INTEL_BLUE_SHADES; + let hash = 0; + for (let i = 0; i < key.length; i++) { + hash = (hash * 31 + key.charCodeAt(i)) | 0; + } + return palette[Math.abs(hash) % palette.length]; +} + +function uniq(values: T[]): T[] { + const seen = new Set(); + const out: T[] = []; + for (const value of values) { + const key = String(value); + if (!seen.has(key) && key !== "" && key !== "NA") { + seen.add(key); + out.push(value); + } + } + return out; +} + +export function labelFor(filterKey: FilterKey, value: string | number): string { + if (filterKey === "gpu") return shortGpu(value); + if (filterKey === "cpu") return shortCpu(value); + if (filterKey === "bs") return `bs=${value}`; + if (filterKey === "topk") return `k=${value}`; + if (filterKey === "totalVectors") { + const n = Number(value); + if (n >= 1e9) return `${n / 1e9}B`; + if (n >= 1e6) return `${n / 1e6}M`; + if (n >= 1e3) return `${n / 1e3}K`; + return String(value); + } + if (filterKey === "dims") return `${value}D`; + return String(value); +} + +export function labelPlural(filterKey: FilterKey): string { + const labels: Record = { + algo: "algorithms", + gpu: "GPUs", + cpu: "CPUs", + bs: "batch sizes", + topk: "TopK values", + totalVectors: "sizes", + dims: "dim sizes", + dtype: "dtypes", + mode: "modes", + }; + return labels[filterKey]; +} + +export function buildFilterOptions( + rows: BenchmarkRow[], +): Record { + return { + algo: uniq(rows.map((r) => r["cuVS Algo"])).sort() as string[], + gpu: uniq( + rows.filter((r) => r["Hardware Type"] === "GPU").map((r) => r["GPU"]), + ).sort() as string[], + cpu: uniq( + rows.filter((r) => r["Hardware Type"] === "CPU").map((r) => r["CPU"]), + ).sort() as string[], + bs: uniq(rows.map((r) => r["Search Batch Size"])).sort( + (a, b) => Number(a) - Number(b), + ) as number[], + topk: uniq(rows.map((r) => r["TopK"])).sort( + (a, b) => Number(a) - Number(b), + ) as number[], + totalVectors: uniq(rows.map((r) => r["Total Vectors"])).sort( + (a, b) => Number(a) - Number(b), + ) as number[], + dims: uniq(rows.map((r) => r["Dimensions"])).sort( + (a, b) => Number(a) - Number(b), + ) as number[], + dtype: uniq(rows.map((r) => r["dtype"])).sort() as string[], + mode: uniq(rows.map((r) => r["Mode"])).sort() as string[], + }; +} + +export function defaultFilters( + options: Record, +): Record { + const first = (arr: (string | number)[]) => (arr.length ? [arr[0]] : []); + return { + algo: first(options.algo), + gpu: first(options.gpu), + cpu: first(options.cpu), + bs: first(options.bs), + topk: first(options.topk), + totalVectors: first(options.totalVectors), + dims: first(options.dims), + dtype: first(options.dtype), + mode: first(options.mode), + }; +} + +export function applyFilters( + rows: BenchmarkRow[], + filters: Record, +): BenchmarkRow[] { + return rows.filter((row) => { + if (!filters.algo.includes(row["cuVS Algo"] as string | number)) return false; + if (!filters.bs.includes(row["Search Batch Size"] as string | number)) return false; + if (!filters.topk.includes(row["TopK"] as string | number)) return false; + if (!filters.totalVectors.includes(row["Total Vectors"] as string | number)) { + return false; + } + if (!filters.dims.includes(row["Dimensions"] as string | number)) return false; + if (!filters.dtype.includes(row["dtype"] as string | number)) return false; + if (!filters.mode.includes(row["Mode"] as string | number)) return false; + if (row["Hardware Type"] === "GPU") { + return filters.gpu.includes(row["GPU"] as string | number); + } + return filters.cpu.includes(row["CPU"] as string | number); + }); +} + +export interface SeriesLegendItem { + label: string; + color: string; + isGpu: boolean; +} + +export interface ChartDataset { + label: string; + backgroundColor: string | string[]; + borderColor: string | string[]; + borderWidth: number; + data: (number | null)[]; + _seriesMeta?: { label: string; key: string | null }[]; + _sortedSlot?: boolean; +} + +export interface ChartDataResult { + labels: string[]; + datasets: ChartDataset[]; + sorted: boolean; + allSeries: SeriesLegendItem[]; + anchorBucket?: string; +} + +export function buildChartData( + rows: BenchmarkRow[], + metricCol: string, + sortMode: SortMode = "default", +): ChartDataResult { + const seriesMap: Record< + string, + { + label: string; + color: string; + isGpu: boolean; + data: Record; + } + > = {}; + + for (const row of rows) { + const isGpu = row["Hardware Type"] === "GPU"; + const hw = isGpu ? shortGpu(row["GPU"]) : shortCpu(row["CPU"]); + const key = + `${isGpu ? "A-GPU" : "B-CPU"}|${hw}|${row["cuVS Algo"]}` + + `|bs${row["Search Batch Size"]}|k${row["TopK"]}`; + if (!seriesMap[key]) { + seriesMap[key] = { + label: + `${hw} · ${row["cuVS Algo"]} · bs=${row["Search Batch Size"]} · k=${row["TopK"]}`, + color: colorFor( + `${hw}${row["cuVS Algo"]}${row["Search Batch Size"]}${row["TopK"]}`, + isGpu, + ), + isGpu, + data: {}, + }; + } + seriesMap[key].data[String(row["Recall Range"])] = Number(row[metricCol]); + } + + const keys = Object.keys(seriesMap).sort(); + const labels = BUCKET_ORDER.map((bucket) => BUCKET_LABEL[bucket]); + const allSeries = keys.map((key) => { + const series = seriesMap[key]; + return { label: series.label, color: series.color, isGpu: series.isGpu }; + }); + + if (sortMode === "default") { + return { + labels, + sorted: false, + allSeries, + datasets: keys.map((key) => { + const series = seriesMap[key]; + return { + label: series.label, + backgroundColor: series.color, + borderColor: series.color, + borderWidth: 1, + data: BUCKET_ORDER.map((bucket) => series.data[bucket] ?? null), + }; + }), + }; + } + + const perBucketRank: Record = {}; + for (const bucket of BUCKET_ORDER) { + const entries: { key: string; value: number }[] = []; + for (const key of keys) { + const value = seriesMap[key].data[bucket]; + if (value != null) entries.push({ key, value }); + } + entries.sort((a, b) => + sortMode === "desc" ? b.value - a.value : a.value - b.value, + ); + perBucketRank[bucket] = entries; + } + + let maxN = 0; + for (const bucket of BUCKET_ORDER) { + maxN = Math.max(maxN, perBucketRank[bucket].length); + } + + const datasets: ChartDataset[] = []; + for (let slot = 0; slot < maxN; slot++) { + const dataArr: (number | null)[] = []; + const bgArr: string[] = []; + const metaArr: { label: string; key: string | null }[] = []; + for (const bucket of BUCKET_ORDER) { + const entry = perBucketRank[bucket][slot]; + if (entry) { + dataArr.push(entry.value); + bgArr.push(seriesMap[entry.key].color); + metaArr.push({ label: seriesMap[entry.key].label, key: entry.key }); + } else { + dataArr.push(null); + bgArr.push("#ccc"); + metaArr.push({ label: "(no data)", key: null }); + } + } + datasets.push({ + label: `Rank ${slot + 1}`, + backgroundColor: bgArr, + borderColor: bgArr, + borderWidth: 1, + data: dataArr, + _seriesMeta: metaArr, + _sortedSlot: true, + }); + } + + let anchorBucket: string | undefined; + for (const bucket of BUCKET_ORDER) { + if (perBucketRank[bucket].length > 0) { + anchorBucket = bucket; + break; + } + } + + let orderedSeries = allSeries; + if (anchorBucket) { + const orderByKey: Record = {}; + perBucketRank[anchorBucket].forEach((entry, idx) => { + orderByKey[entry.key] = idx; + }); + const ranked = keys + .filter((key) => orderByKey[key] != null) + .sort((a, b) => orderByKey[a] - orderByKey[b]); + const stragglers = keys.filter((key) => orderByKey[key] == null); + orderedSeries = [...ranked, ...stragglers].map((key) => ({ + label: seriesMap[key].label, + color: seriesMap[key].color, + isGpu: seriesMap[key].isGpu, + })); + } + + return { + labels, + datasets, + sorted: true, + allSeries: orderedSeries, + anchorBucket, + }; +} + +export function describeFilterSelection( + filters: Record, + options: Record, +): string { + const parts: string[] = []; + + const describe = ( + key: FilterKey, + emptyLabel: string, + format: (values: (string | number)[]) => string, + ) => { + if (filters[key].length === 0) parts.push(emptyLabel); + else if (filters[key].length < options[key].length) parts.push(format(filters[key])); + }; + + describe("algo", "no algos", (values) => values.join(", ")); + describe("bs", "no bs", (values) => `bs=${values.join("/")}`); + describe("topk", "no k", (values) => `k=${values.join("/")}`); + describe("totalVectors", "no dataset size", (values) => + values.map((value) => labelFor("totalVectors", value)).join("/"), + ); + describe("dims", "no dims", (values) => values.map((value) => `${value}D`).join("/")); + describe("dtype", "no dtype", (values) => values.join("/")); + describe("mode", "no mode", (values) => values.join("/")); + + if (filters.gpu.length && filters.gpu.length < options.gpu.length) { + parts.push(`GPUs: ${filters.gpu.map((value) => labelFor("gpu", value)).join(", ")}`); + } else if (filters.gpu.length === 0) { + parts.push("GPUs: none"); + } + + if (filters.cpu.length && filters.cpu.length < options.cpu.length) { + parts.push(`CPUs: ${filters.cpu.map((value) => labelFor("cpu", value)).join(", ")}`); + } else if (filters.cpu.length === 0) { + parts.push("CPUs: none"); + } + + return parts.length ? parts.join(" · ") : "all rows (unfiltered)"; +} + +export function fmtBar(value: number | null): string { + if (value == null || Number.isNaN(value)) return ""; + const abs = Math.abs(value); + if (abs >= 1_000_000) return `${(value / 1_000_000).toFixed(abs >= 10_000_000 ? 0 : 1)}M`; + if (abs >= 1_000) return `${(value / 1_000).toFixed(abs >= 10_000 ? 0 : 1)}K`; + if (abs >= 100) return value.toFixed(0); + if (abs >= 10) return value.toFixed(1); + return value.toFixed(2); +} + +export function sortTableRows(rows: BenchmarkRow[]): BenchmarkRow[] { + return [...rows].sort((a, b) => + `${a["Hardware Type"]}${a["cuVS Algo"]}${a["SKU"]}${a["Search Batch Size"]}${a["TopK"]}${a["Recall Range"]}`.localeCompare( + `${b["Hardware Type"]}${b["cuVS Algo"]}${b["SKU"]}${b["Search Batch Size"]}${b["TopK"]}${b["Recall Range"]}`, + ), + ); +}